提交 a87b0b6d 编写于 作者: G gineshidalgo99

Speed verbose at runtime configurable by user

上级 d97114f3
......@@ -6,7 +6,7 @@ SET WGET_EXE=wget\wget.exe
:: Download temporary zip
echo ----- Downloading Caffe -----
SET FREEGLUT_FOLDER=caffe\
SET FREEGLUT_FOLDER=freeglut\
SET ZIP_NAME=freeglut_2018_01_14.zip
SET ZIP_FULL_PATH=%FREEGLUT_FOLDER%%ZIP_NAME%
%WGET_EXE% -c http://posefs1.perception.cs.cmu.edu/OpenPose/3rdparty/windows/%ZIP_NAME% -P %FREEGLUT_FOLDER%
......
......@@ -87,6 +87,9 @@ option(BUILD_DOCS "Build OpenPose documentation." OFF)
# Build as shared library
option(BUILD_SHARED_LIBS "Build as shared lib" ON)
# Speed profiler
option(PROFILER_ENABLED "If enabled, OpenPose will be able to print out speed information at runtime." OFF)
### FIND REQUIRED PACKAGES
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
......@@ -281,3 +284,10 @@ configure_file(
add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
### OPENPOSE FLAGS
# Set/disable profiler
if (PROFILER_ENABLED)
add_definitions(-DPROFILER_ENABLED)
endif (PROFILER_ENABLED)
......@@ -133,7 +133,7 @@ Each flag is divided into flag name, default value, and description.
1. Debugging/Other
- DEFINE_int32(logging_level, 3, "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for low priority messages and 4 for important ones.");
- DEFINE_bool(disable_multi_thread, false, "It would slightly reduce the frame rate in order to highly reduce the lag. Mainly useful for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the error.");
- DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some runtime statistics at this frame number.");
2. Producer
- DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative number (by default), to auto-detect and open the first available camera.");
- DEFINE_string(camera_resolution, "1280x720", "Size of the camera frames to ask for.");
......
......@@ -133,8 +133,8 @@ You just need to remove the OpenPose folder, by default called `openpose/`. E.g.
## Windows
### Installation - Demo
1. Download and unzip the [portable OpenPose demo 1.0.1](http://posefs1.perception.cs.cmu.edu/OpenPose/OpenPose_demo_1.0.1.zip).
### Installation - Demo and Binaries
1. Download and unzip the latest `openpose-X.X.X-win64-binaries.zip` Windows binary zip file from the [releases section](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases).
......
......@@ -176,3 +176,8 @@ OpenPose Library - Release Notes
4. Added freeglut download script (3-D reconstruction demo for Windows).
2. Main bugs fixed:
1. Slight speed up (~1%) for performing the non-maximum suppression stage only in the body part heatmaps channels, and not also in the PAF channels.
## All OpenPose Versions
Download and/or check any OpenPose version from [https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases).
......@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera.");
......@@ -195,7 +197,12 @@ int openPoseDemo()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
// // For debugging
// // Print all logging messages
// op::ConfigureLog::setPriorityThreshold(op::Priority::None);
// // Print out speed values faster
// op::Profiler::setDefaultX(100);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -38,7 +38,6 @@ int handFromJsonTest()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -49,6 +49,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera.");
......@@ -204,7 +206,7 @@ int openPoseTutorialWrapper4()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera.");
......@@ -280,7 +282,7 @@ int openPoseTutorialWrapper1()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer
DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.).");
// OpenPose
......@@ -363,7 +365,7 @@ int openPoseTutorialWrapper2()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer
DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.).");
// OpenPose
......@@ -321,7 +323,7 @@ int openPoseTutorialWrapper3()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -41,6 +41,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// OpenPose
DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
DEFINE_string(output_resolution, "-1x-1", "The image resolution (display and output). Use \"-1x-1\" to force the program to use the"
......@@ -172,7 +174,7 @@ int openpose3d()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now();
......
......@@ -66,7 +66,7 @@ namespace op
// Profiling speed
Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey,
__LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X);
__LINE__, __FUNCTION__, __FILE__);
// Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
}
......
......@@ -21,15 +21,22 @@ namespace op
class OP_API Profiler
{
public:
static const unsigned long long DEFAULT_X;
static unsigned long long DEFAULT_X;
// Non-thread safe, it must be performed at the beginning of the code before any parallelization occurs
static void setDefaultX(const unsigned long long defaultX);
static const std::string timerInit(const int line, const std::string& function, const std::string& file);
static void timerEnd(const std::string& key);
static void printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X);
static void printAveragedTimeMsOnIterationX(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X);
static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void profileGpuMemory(const int line, const std::string& function, const std::string& file);
};
......
......@@ -175,8 +175,8 @@ namespace op
heatMapsGpuPtr, peaksGpuPtr);
#else
UNUSED(bottom);
UNUSED(top);
UNUSED(poseKeypoints);
UNUSED(poseScores);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif
......
......@@ -10,7 +10,7 @@
namespace op
{
const unsigned long long Profiler::DEFAULT_X = 1000;
unsigned long long Profiler::DEFAULT_X = 1000;
#ifdef PROFILER_ENABLED
......@@ -26,13 +26,23 @@ namespace op
return file + function + std::to_string(line) + threadId.str();
}
void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line, const std::string& function, const std::string& file)
void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line,
const std::string& function, const std::string& file)
{
const auto stringMessage = std::to_string( timePast / timeCounter / 1e6 ) + " msec at";
log(stringMessage, Priority::Max, line, function, file);
}
#endif
void Profiler::setDefaultX(const unsigned long long defaultX)
{
#ifdef PROFILER_ENABLED
DEFAULT_X = defaultX;
#else
UNUSED(defaultX);
#endif
}
const std::string Profiler::timerInit(const int line, const std::string& function, const std::string& file)
{
#ifdef PROFILER_ENABLED
......@@ -60,7 +70,9 @@ namespace op
{
auto tuple = sProfilerTuple[key];
// Time between init & end
const auto timeNs = (double)std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - std::get<2>(tuple)).count();
const auto timeNs = (double)std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now() - std::get<2>(tuple)
).count();
// Accumulate averaged time
std::get<0>(tuple) += timeNs;
std::get<1>(tuple)++;
......@@ -74,7 +86,8 @@ namespace op
#endif
}
void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x)
void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function,
const std::string& file, const unsigned long long x)
{
#ifdef PROFILER_ENABLED
std::unique_lock<std::mutex> lock{sMutexProfiler};
......@@ -88,7 +101,8 @@ namespace op
}
}
else
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__);
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.",
__LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(key);
UNUSED(line);
......@@ -98,7 +112,9 @@ namespace op
#endif
}
void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x)
void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x)
{
#ifdef PROFILER_ENABLED
std::unique_lock<std::mutex> lock{sMutexProfiler};
......@@ -118,7 +134,8 @@ namespace op
}
}
else
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__);
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.",
__LINE__, __FUNCTION__, __FILE__);
#else
UNUSED(key);
UNUSED(line);
......@@ -135,7 +152,8 @@ namespace op
log("GPU usage.", Priority::Max, line, function, file);
// GPU info
const auto answer = std::system("nvidia-smi | grep \"Processes:\"") | std::system("nvidia-smi | grep \"Process name\"");
const auto answer = std::system("nvidia-smi | grep \"Processes:\"")
| std::system("nvidia-smi | grep \"Process name\"");
if (answer != 0)
log("Error on the nvidia-smi header. Please, inform us of this error.", Priority::Max);
else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册