From a87b0b6d41d00d3c57bfa9c6eb3c4171ab7d9004 Mon Sep 17 00:00:00 2001 From: gineshidalgo99 Date: Tue, 16 Jan 2018 19:11:16 -0500 Subject: [PATCH] Speed verbose at runtime configurable by user --- 3rdparty/windows/getFreeglut.bat | 2 +- CMakeLists.txt | 10 ++++++ doc/demo_overview.md | 2 +- doc/installation.md | 4 +-- doc/release_notes.md | 5 +++ examples/openpose/openpose.cpp | 9 ++++- examples/tests/handFromJsonTest.cpp | 1 - .../1_custom_post_processing.cpp | 4 ++- .../1_user_asynchronous_output.cpp | 4 ++- .../tutorial_wrapper/2_user_synchronous.cpp | 4 ++- .../tutorial_wrapper/3_user_asynchronous.cpp | 4 ++- examples_beta/openpose3d/openpose3d.cpp | 4 ++- include/openpose/filestream/wHeatMapSaver.hpp | 2 +- include/openpose/utilities/profiler.hpp | 13 +++++-- src/openpose/pose/bodyPartConnectorCaffe.cpp | 2 +- src/openpose/utilities/profiler.cpp | 34 ++++++++++++++----- 16 files changed, 80 insertions(+), 24 deletions(-) diff --git a/3rdparty/windows/getFreeglut.bat b/3rdparty/windows/getFreeglut.bat index c54c2b6d..3cab40f4 100644 --- a/3rdparty/windows/getFreeglut.bat +++ b/3rdparty/windows/getFreeglut.bat @@ -6,7 +6,7 @@ SET WGET_EXE=wget\wget.exe :: Download temporary zip echo ----- Downloading Caffe ----- -SET FREEGLUT_FOLDER=caffe\ +SET FREEGLUT_FOLDER=freeglut\ SET ZIP_NAME=freeglut_2018_01_14.zip SET ZIP_FULL_PATH=%FREEGLUT_FOLDER%%ZIP_NAME% %WGET_EXE% -c http://posefs1.perception.cs.cmu.edu/OpenPose/3rdparty/windows/%ZIP_NAME% -P %FREEGLUT_FOLDER% diff --git a/CMakeLists.txt b/CMakeLists.txt index 0001b546..a9c9382e 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -87,6 +87,9 @@ option(BUILD_DOCS "Build OpenPose documentation." OFF) # Build as shared library option(BUILD_SHARED_LIBS "Build as shared lib" ON) +# Speed profiler +option(PROFILER_ENABLED "If enabled, OpenPose will be able to print out speed information at runtime." OFF) + ### FIND REQUIRED PACKAGES list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") @@ -281,3 +284,10 @@ configure_file( add_custom_target(uninstall COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) + +### OPENPOSE FLAGS + +# Set/disable profiler +if (PROFILER_ENABLED) + add_definitions(-DPROFILER_ENABLED) +endif (PROFILER_ENABLED) diff --git a/doc/demo_overview.md b/doc/demo_overview.md index c69413bd..515dcb04 100644 --- a/doc/demo_overview.md +++ b/doc/demo_overview.md @@ -133,7 +133,7 @@ Each flag is divided into flag name, default value, and description. 1. Debugging/Other - DEFINE_int32(logging_level, 3, "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for low priority messages and 4 for important ones."); - DEFINE_bool(disable_multi_thread, false, "It would slightly reduce the frame rate in order to highly reduce the lag. Mainly useful for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the error."); - +- DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some runtime statistics at this frame number."); 2. Producer - DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative number (by default), to auto-detect and open the first available camera."); - DEFINE_string(camera_resolution, "1280x720", "Size of the camera frames to ask for."); diff --git a/doc/installation.md b/doc/installation.md index 4f715d18..5641f5ff 100644 --- a/doc/installation.md +++ b/doc/installation.md @@ -133,8 +133,8 @@ You just need to remove the OpenPose folder, by default called `openpose/`. E.g. ## Windows -### Installation - Demo -1. Download and unzip the [portable OpenPose demo 1.0.1](http://posefs1.perception.cs.cmu.edu/OpenPose/OpenPose_demo_1.0.1.zip). +### Installation - Demo and Binaries +1. Download and unzip the latest `openpose-X.X.X-win64-binaries.zip` Windows binary zip file from the [releases section](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases). diff --git a/doc/release_notes.md b/doc/release_notes.md index 544094cc..52a7a580 100644 --- a/doc/release_notes.md +++ b/doc/release_notes.md @@ -176,3 +176,8 @@ OpenPose Library - Release Notes 4. Added freeglut download script (3-D reconstruction demo for Windows). 2. Main bugs fixed: 1. Slight speed up (~1%) for performing the non-maximum suppression stage only in the body part heatmaps channels, and not also in the PAF channels. + + + +## All OpenPose Versions +Download and/or check any OpenPose version from [https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases). diff --git a/examples/openpose/openpose.cpp b/examples/openpose/openpose.cpp index f8cbd8f3..b0cac67f 100755 --- a/examples/openpose/openpose.cpp +++ b/examples/openpose/openpose.cpp @@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // Producer DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" " number (by default), to auto-detect and open the first available camera."); @@ -195,7 +197,12 @@ int openPoseDemo() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); + // // For debugging + // // Print all logging messages + // op::ConfigureLog::setPriorityThreshold(op::Priority::None); + // // Print out speed values faster + // op::Profiler::setDefaultX(100); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples/tests/handFromJsonTest.cpp b/examples/tests/handFromJsonTest.cpp index 69c9206b..a82ed245 100644 --- a/examples/tests/handFromJsonTest.cpp +++ b/examples/tests/handFromJsonTest.cpp @@ -38,7 +38,6 @@ int handFromJsonTest() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples/tutorial_add_module/1_custom_post_processing.cpp b/examples/tutorial_add_module/1_custom_post_processing.cpp index 940afe96..79ed1513 100644 --- a/examples/tutorial_add_module/1_custom_post_processing.cpp +++ b/examples/tutorial_add_module/1_custom_post_processing.cpp @@ -49,6 +49,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // Producer DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" " number (by default), to auto-detect and open the first available camera."); @@ -204,7 +206,7 @@ int openPoseTutorialWrapper4() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples/tutorial_wrapper/1_user_asynchronous_output.cpp b/examples/tutorial_wrapper/1_user_asynchronous_output.cpp index 52b5124b..a4ed565a 100644 --- a/examples/tutorial_wrapper/1_user_asynchronous_output.cpp +++ b/examples/tutorial_wrapper/1_user_asynchronous_output.cpp @@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // Producer DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" " number (by default), to auto-detect and open the first available camera."); @@ -280,7 +282,7 @@ int openPoseTutorialWrapper1() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples/tutorial_wrapper/2_user_synchronous.cpp b/examples/tutorial_wrapper/2_user_synchronous.cpp index e58140b2..3b473116 100644 --- a/examples/tutorial_wrapper/2_user_synchronous.cpp +++ b/examples/tutorial_wrapper/2_user_synchronous.cpp @@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // Producer DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.)."); // OpenPose @@ -363,7 +365,7 @@ int openPoseTutorialWrapper2() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples/tutorial_wrapper/3_user_asynchronous.cpp b/examples/tutorial_wrapper/3_user_asynchronous.cpp index 6aea7383..9c33a403 100644 --- a/examples/tutorial_wrapper/3_user_asynchronous.cpp +++ b/examples/tutorial_wrapper/3_user_asynchronous.cpp @@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // Producer DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.)."); // OpenPose @@ -321,7 +323,7 @@ int openPoseTutorialWrapper3() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/examples_beta/openpose3d/openpose3d.cpp b/examples_beta/openpose3d/openpose3d.cpp index 088544a0..2fe5ac92 100644 --- a/examples_beta/openpose3d/openpose3d.cpp +++ b/examples_beta/openpose3d/openpose3d.cpp @@ -41,6 +41,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " error."); +DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some" + " runtime statistics at this frame number."); // OpenPose DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located."); DEFINE_string(output_resolution, "-1x-1", "The image resolution (display and output). Use \"-1x-1\" to force the program to use the" @@ -172,7 +174,7 @@ int openpose3d() op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", __LINE__, __FUNCTION__, __FILE__); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); - // op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages + op::Profiler::setDefaultX(FLAGS_profile_speed); op::log("Starting pose estimation demo.", op::Priority::High); const auto timerBegin = std::chrono::high_resolution_clock::now(); diff --git a/include/openpose/filestream/wHeatMapSaver.hpp b/include/openpose/filestream/wHeatMapSaver.hpp index fca54c06..ce25765c 100644 --- a/include/openpose/filestream/wHeatMapSaver.hpp +++ b/include/openpose/filestream/wHeatMapSaver.hpp @@ -66,7 +66,7 @@ namespace op // Profiling speed Profiler::timerEnd(profilerKey); Profiler::printAveragedTimeMsOnIterationX(profilerKey, - __LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X); + __LINE__, __FUNCTION__, __FILE__); // Debugging log dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__); } diff --git a/include/openpose/utilities/profiler.hpp b/include/openpose/utilities/profiler.hpp index 762feb45..276bdc8e 100644 --- a/include/openpose/utilities/profiler.hpp +++ b/include/openpose/utilities/profiler.hpp @@ -21,15 +21,22 @@ namespace op class OP_API Profiler { public: - static const unsigned long long DEFAULT_X; + static unsigned long long DEFAULT_X; + + // Non-thread safe, it must be performed at the beginning of the code before any parallelization occurs + static void setDefaultX(const unsigned long long defaultX); static const std::string timerInit(const int line, const std::string& function, const std::string& file); static void timerEnd(const std::string& key); - static void printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X); + static void printAveragedTimeMsOnIterationX(const std::string& key, const int line, + const std::string& function, const std::string& file, + const unsigned long long x = DEFAULT_X); - static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X); + static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line, + const std::string& function, const std::string& file, + const unsigned long long x = DEFAULT_X); static void profileGpuMemory(const int line, const std::string& function, const std::string& file); }; diff --git a/src/openpose/pose/bodyPartConnectorCaffe.cpp b/src/openpose/pose/bodyPartConnectorCaffe.cpp index 274dd64a..d1f8e427 100644 --- a/src/openpose/pose/bodyPartConnectorCaffe.cpp +++ b/src/openpose/pose/bodyPartConnectorCaffe.cpp @@ -175,8 +175,8 @@ namespace op heatMapsGpuPtr, peaksGpuPtr); #else UNUSED(bottom); - UNUSED(top); UNUSED(poseKeypoints); + UNUSED(poseScores); error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run" " this functionality.", __LINE__, __FUNCTION__, __FILE__); #endif diff --git a/src/openpose/utilities/profiler.cpp b/src/openpose/utilities/profiler.cpp index af6cc8ae..bb487f78 100644 --- a/src/openpose/utilities/profiler.cpp +++ b/src/openpose/utilities/profiler.cpp @@ -10,7 +10,7 @@ namespace op { - const unsigned long long Profiler::DEFAULT_X = 1000; + unsigned long long Profiler::DEFAULT_X = 1000; #ifdef PROFILER_ENABLED @@ -26,13 +26,23 @@ namespace op return file + function + std::to_string(line) + threadId.str(); } - void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line, const std::string& function, const std::string& file) + void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line, + const std::string& function, const std::string& file) { const auto stringMessage = std::to_string( timePast / timeCounter / 1e6 ) + " msec at"; log(stringMessage, Priority::Max, line, function, file); } #endif + void Profiler::setDefaultX(const unsigned long long defaultX) + { + #ifdef PROFILER_ENABLED + DEFAULT_X = defaultX; + #else + UNUSED(defaultX); + #endif + } + const std::string Profiler::timerInit(const int line, const std::string& function, const std::string& file) { #ifdef PROFILER_ENABLED @@ -60,7 +70,9 @@ namespace op { auto tuple = sProfilerTuple[key]; // Time between init & end - const auto timeNs = (double)std::chrono::duration_cast(std::chrono::high_resolution_clock::now() - std::get<2>(tuple)).count(); + const auto timeNs = (double)std::chrono::duration_cast( + std::chrono::high_resolution_clock::now() - std::get<2>(tuple) + ).count(); // Accumulate averaged time std::get<0>(tuple) += timeNs; std::get<1>(tuple)++; @@ -74,7 +86,8 @@ namespace op #endif } - void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x) + void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, + const std::string& file, const unsigned long long x) { #ifdef PROFILER_ENABLED std::unique_lock lock{sMutexProfiler}; @@ -88,7 +101,8 @@ namespace op } } else - error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__); + error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", + __LINE__, __FUNCTION__, __FILE__); #else UNUSED(key); UNUSED(line); @@ -98,7 +112,9 @@ namespace op #endif } - void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x) + void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line, + const std::string& function, const std::string& file, + const unsigned long long x) { #ifdef PROFILER_ENABLED std::unique_lock lock{sMutexProfiler}; @@ -118,7 +134,8 @@ namespace op } } else - error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__); + error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", + __LINE__, __FUNCTION__, __FILE__); #else UNUSED(key); UNUSED(line); @@ -135,7 +152,8 @@ namespace op log("GPU usage.", Priority::Max, line, function, file); // GPU info - const auto answer = std::system("nvidia-smi | grep \"Processes:\"") | std::system("nvidia-smi | grep \"Process name\""); + const auto answer = std::system("nvidia-smi | grep \"Processes:\"") + | std::system("nvidia-smi | grep \"Process name\""); if (answer != 0) log("Error on the nvidia-smi header. Please, inform us of this error.", Priority::Max); else -- GitLab