提交 a87b0b6d 编写于 作者: G gineshidalgo99

Speed verbose at runtime configurable by user

上级 d97114f3
...@@ -6,7 +6,7 @@ SET WGET_EXE=wget\wget.exe ...@@ -6,7 +6,7 @@ SET WGET_EXE=wget\wget.exe
:: Download temporary zip :: Download temporary zip
echo ----- Downloading Caffe ----- echo ----- Downloading Caffe -----
SET FREEGLUT_FOLDER=caffe\ SET FREEGLUT_FOLDER=freeglut\
SET ZIP_NAME=freeglut_2018_01_14.zip SET ZIP_NAME=freeglut_2018_01_14.zip
SET ZIP_FULL_PATH=%FREEGLUT_FOLDER%%ZIP_NAME% SET ZIP_FULL_PATH=%FREEGLUT_FOLDER%%ZIP_NAME%
%WGET_EXE% -c http://posefs1.perception.cs.cmu.edu/OpenPose/3rdparty/windows/%ZIP_NAME% -P %FREEGLUT_FOLDER% %WGET_EXE% -c http://posefs1.perception.cs.cmu.edu/OpenPose/3rdparty/windows/%ZIP_NAME% -P %FREEGLUT_FOLDER%
......
...@@ -87,6 +87,9 @@ option(BUILD_DOCS "Build OpenPose documentation." OFF) ...@@ -87,6 +87,9 @@ option(BUILD_DOCS "Build OpenPose documentation." OFF)
# Build as shared library # Build as shared library
option(BUILD_SHARED_LIBS "Build as shared lib" ON) option(BUILD_SHARED_LIBS "Build as shared lib" ON)
# Speed profiler
option(PROFILER_ENABLED "If enabled, OpenPose will be able to print out speed information at runtime." OFF)
### FIND REQUIRED PACKAGES ### FIND REQUIRED PACKAGES
list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules") list(APPEND CMAKE_MODULE_PATH "${CMAKE_CURRENT_SOURCE_DIR}/cmake/Modules")
...@@ -281,3 +284,10 @@ configure_file( ...@@ -281,3 +284,10 @@ configure_file(
add_custom_target(uninstall add_custom_target(uninstall
COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake) COMMAND ${CMAKE_COMMAND} -P ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
### OPENPOSE FLAGS
# Set/disable profiler
if (PROFILER_ENABLED)
add_definitions(-DPROFILER_ENABLED)
endif (PROFILER_ENABLED)
...@@ -133,7 +133,7 @@ Each flag is divided into flag name, default value, and description. ...@@ -133,7 +133,7 @@ Each flag is divided into flag name, default value, and description.
1. Debugging/Other 1. Debugging/Other
- DEFINE_int32(logging_level, 3, "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for low priority messages and 4 for important ones."); - DEFINE_int32(logging_level, 3, "The logging level. Integer in the range [0, 255]. 0 will output any log() message, while 255 will not output any. Current OpenPose library messages are in the range 0-4: 1 for low priority messages and 4 for important ones.");
- DEFINE_bool(disable_multi_thread, false, "It would slightly reduce the frame rate in order to highly reduce the lag. Mainly useful for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the error."); - DEFINE_bool(disable_multi_thread, false, "It would slightly reduce the frame rate in order to highly reduce the lag. Mainly useful for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the error.");
- DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some runtime statistics at this frame number.");
2. Producer 2. Producer
- DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative number (by default), to auto-detect and open the first available camera."); - DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative number (by default), to auto-detect and open the first available camera.");
- DEFINE_string(camera_resolution, "1280x720", "Size of the camera frames to ask for."); - DEFINE_string(camera_resolution, "1280x720", "Size of the camera frames to ask for.");
......
...@@ -133,8 +133,8 @@ You just need to remove the OpenPose folder, by default called `openpose/`. E.g. ...@@ -133,8 +133,8 @@ You just need to remove the OpenPose folder, by default called `openpose/`. E.g.
## Windows ## Windows
### Installation - Demo ### Installation - Demo and Binaries
1. Download and unzip the [portable OpenPose demo 1.0.1](http://posefs1.perception.cs.cmu.edu/OpenPose/OpenPose_demo_1.0.1.zip). 1. Download and unzip the latest `openpose-X.X.X-win64-binaries.zip` Windows binary zip file from the [releases section](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases).
......
...@@ -176,3 +176,8 @@ OpenPose Library - Release Notes ...@@ -176,3 +176,8 @@ OpenPose Library - Release Notes
4. Added freeglut download script (3-D reconstruction demo for Windows). 4. Added freeglut download script (3-D reconstruction demo for Windows).
2. Main bugs fixed: 2. Main bugs fixed:
1. Slight speed up (~1%) for performing the non-maximum suppression stage only in the body part heatmaps channels, and not also in the PAF channels. 1. Slight speed up (~1%) for performing the non-maximum suppression stage only in the body part heatmaps channels, and not also in the PAF channels.
## All OpenPose Versions
Download and/or check any OpenPose version from [https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases](https://github.com/CMU-Perceptual-Computing-Lab/openpose/releases).
...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer // Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera."); " number (by default), to auto-detect and open the first available camera.");
...@@ -195,7 +197,12 @@ int openPoseDemo() ...@@ -195,7 +197,12 @@ int openPoseDemo()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
// // For debugging
// // Print all logging messages
// op::ConfigureLog::setPriorityThreshold(op::Priority::None);
// // Print out speed values faster
// op::Profiler::setDefaultX(100);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -38,7 +38,6 @@ int handFromJsonTest() ...@@ -38,7 +38,6 @@ int handFromJsonTest()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -49,6 +49,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -49,6 +49,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer // Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera."); " number (by default), to auto-detect and open the first available camera.");
...@@ -204,7 +206,7 @@ int openPoseTutorialWrapper4() ...@@ -204,7 +206,7 @@ int openPoseTutorialWrapper4()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer // Producer
DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative" DEFINE_int32(camera, -1, "The camera index for cv::VideoCapture. Integer in the range [0, 9]. Select a negative"
" number (by default), to auto-detect and open the first available camera."); " number (by default), to auto-detect and open the first available camera.");
...@@ -280,7 +282,7 @@ int openPoseTutorialWrapper1() ...@@ -280,7 +282,7 @@ int openPoseTutorialWrapper1()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer // Producer
DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.)."); DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.).");
// OpenPose // OpenPose
...@@ -363,7 +365,7 @@ int openPoseTutorialWrapper2() ...@@ -363,7 +365,7 @@ int openPoseTutorialWrapper2()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -39,6 +39,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// Producer // Producer
DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.)."); DEFINE_string(image_dir, "examples/media/", "Process a directory of images. Read all standard formats (jpg, png, bmp, etc.).");
// OpenPose // OpenPose
...@@ -321,7 +323,7 @@ int openPoseTutorialWrapper3() ...@@ -321,7 +323,7 @@ int openPoseTutorialWrapper3()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -41,6 +41,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc ...@@ -41,6 +41,8 @@ DEFINE_bool(disable_multi_thread, false, "It would slightly reduc
" for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with" " for 1) Cases where it is needed a low latency (e.g. webcam in real-time scenarios with"
" low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the" " low-range GPU devices); and 2) Debugging OpenPose when it is crashing to locate the"
" error."); " error.");
DEFINE_int32(profile_speed, 1000, "If PROFILER_ENABLED was set in CMake or Makefile.config files, OpenPose will show some"
" runtime statistics at this frame number.");
// OpenPose // OpenPose
DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located."); DEFINE_string(model_folder, "models/", "Folder path (absolute or relative) where the models (pose, face, ...) are located.");
DEFINE_string(output_resolution, "-1x-1", "The image resolution (display and output). Use \"-1x-1\" to force the program to use the" DEFINE_string(output_resolution, "-1x-1", "The image resolution (display and output). Use \"-1x-1\" to force the program to use the"
...@@ -172,7 +174,7 @@ int openpose3d() ...@@ -172,7 +174,7 @@ int openpose3d()
op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.", op::check(0 <= FLAGS_logging_level && FLAGS_logging_level <= 255, "Wrong logging_level value.",
__LINE__, __FUNCTION__, __FILE__); __LINE__, __FUNCTION__, __FILE__);
op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level); op::ConfigureLog::setPriorityThreshold((op::Priority)FLAGS_logging_level);
// op::ConfigureLog::setPriorityThreshold(op::Priority::None); // To print all logging messages op::Profiler::setDefaultX(FLAGS_profile_speed);
op::log("Starting pose estimation demo.", op::Priority::High); op::log("Starting pose estimation demo.", op::Priority::High);
const auto timerBegin = std::chrono::high_resolution_clock::now(); const auto timerBegin = std::chrono::high_resolution_clock::now();
......
...@@ -66,7 +66,7 @@ namespace op ...@@ -66,7 +66,7 @@ namespace op
// Profiling speed // Profiling speed
Profiler::timerEnd(profilerKey); Profiler::timerEnd(profilerKey);
Profiler::printAveragedTimeMsOnIterationX(profilerKey, Profiler::printAveragedTimeMsOnIterationX(profilerKey,
__LINE__, __FUNCTION__, __FILE__, Profiler::DEFAULT_X); __LINE__, __FUNCTION__, __FILE__);
// Debugging log // Debugging log
dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__); dLog("", Priority::Low, __LINE__, __FUNCTION__, __FILE__);
} }
......
...@@ -21,15 +21,22 @@ namespace op ...@@ -21,15 +21,22 @@ namespace op
class OP_API Profiler class OP_API Profiler
{ {
public: public:
static const unsigned long long DEFAULT_X; static unsigned long long DEFAULT_X;
// Non-thread safe, it must be performed at the beginning of the code before any parallelization occurs
static void setDefaultX(const unsigned long long defaultX);
static const std::string timerInit(const int line, const std::string& function, const std::string& file); static const std::string timerInit(const int line, const std::string& function, const std::string& file);
static void timerEnd(const std::string& key); static void timerEnd(const std::string& key);
static void printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X); static void printAveragedTimeMsOnIterationX(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x = DEFAULT_X); static void printAveragedTimeMsEveryXIterations(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x = DEFAULT_X);
static void profileGpuMemory(const int line, const std::string& function, const std::string& file); static void profileGpuMemory(const int line, const std::string& function, const std::string& file);
}; };
......
...@@ -175,8 +175,8 @@ namespace op ...@@ -175,8 +175,8 @@ namespace op
heatMapsGpuPtr, peaksGpuPtr); heatMapsGpuPtr, peaksGpuPtr);
#else #else
UNUSED(bottom); UNUSED(bottom);
UNUSED(top);
UNUSED(poseKeypoints); UNUSED(poseKeypoints);
UNUSED(poseScores);
error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run" error("OpenPose must be compiled with the `USE_CAFFE` & `USE_CUDA` macro definitions in order to run"
" this functionality.", __LINE__, __FUNCTION__, __FILE__); " this functionality.", __LINE__, __FUNCTION__, __FILE__);
#endif #endif
......
...@@ -10,7 +10,7 @@ ...@@ -10,7 +10,7 @@
namespace op namespace op
{ {
const unsigned long long Profiler::DEFAULT_X = 1000; unsigned long long Profiler::DEFAULT_X = 1000;
#ifdef PROFILER_ENABLED #ifdef PROFILER_ENABLED
...@@ -26,13 +26,23 @@ namespace op ...@@ -26,13 +26,23 @@ namespace op
return file + function + std::to_string(line) + threadId.str(); return file + function + std::to_string(line) + threadId.str();
} }
void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line, const std::string& function, const std::string& file) void printAveragedTimeMsCommon(const double timePast, const unsigned long long timeCounter, const int line,
const std::string& function, const std::string& file)
{ {
const auto stringMessage = std::to_string( timePast / timeCounter / 1e6 ) + " msec at"; const auto stringMessage = std::to_string( timePast / timeCounter / 1e6 ) + " msec at";
log(stringMessage, Priority::Max, line, function, file); log(stringMessage, Priority::Max, line, function, file);
} }
#endif #endif
void Profiler::setDefaultX(const unsigned long long defaultX)
{
#ifdef PROFILER_ENABLED
DEFAULT_X = defaultX;
#else
UNUSED(defaultX);
#endif
}
const std::string Profiler::timerInit(const int line, const std::string& function, const std::string& file) const std::string Profiler::timerInit(const int line, const std::string& function, const std::string& file)
{ {
#ifdef PROFILER_ENABLED #ifdef PROFILER_ENABLED
...@@ -60,7 +70,9 @@ namespace op ...@@ -60,7 +70,9 @@ namespace op
{ {
auto tuple = sProfilerTuple[key]; auto tuple = sProfilerTuple[key];
// Time between init & end // Time between init & end
const auto timeNs = (double)std::chrono::duration_cast<std::chrono::nanoseconds>(std::chrono::high_resolution_clock::now() - std::get<2>(tuple)).count(); const auto timeNs = (double)std::chrono::duration_cast<std::chrono::nanoseconds>(
std::chrono::high_resolution_clock::now() - std::get<2>(tuple)
).count();
// Accumulate averaged time // Accumulate averaged time
std::get<0>(tuple) += timeNs; std::get<0>(tuple) += timeNs;
std::get<1>(tuple)++; std::get<1>(tuple)++;
...@@ -74,7 +86,8 @@ namespace op ...@@ -74,7 +86,8 @@ namespace op
#endif #endif
} }
void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x) void Profiler::printAveragedTimeMsOnIterationX(const std::string& key, const int line, const std::string& function,
const std::string& file, const unsigned long long x)
{ {
#ifdef PROFILER_ENABLED #ifdef PROFILER_ENABLED
std::unique_lock<std::mutex> lock{sMutexProfiler}; std::unique_lock<std::mutex> lock{sMutexProfiler};
...@@ -88,7 +101,8 @@ namespace op ...@@ -88,7 +101,8 @@ namespace op
} }
} }
else else
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__); error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.",
__LINE__, __FUNCTION__, __FILE__);
#else #else
UNUSED(key); UNUSED(key);
UNUSED(line); UNUSED(line);
...@@ -98,7 +112,9 @@ namespace op ...@@ -98,7 +112,9 @@ namespace op
#endif #endif
} }
void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line, const std::string& function, const std::string& file, const unsigned long long x) void Profiler::printAveragedTimeMsEveryXIterations(const std::string& key, const int line,
const std::string& function, const std::string& file,
const unsigned long long x)
{ {
#ifdef PROFILER_ENABLED #ifdef PROFILER_ENABLED
std::unique_lock<std::mutex> lock{sMutexProfiler}; std::unique_lock<std::mutex> lock{sMutexProfiler};
...@@ -118,7 +134,8 @@ namespace op ...@@ -118,7 +134,8 @@ namespace op
} }
} }
else else
error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.", __LINE__, __FUNCTION__, __FILE__); error("Profiler::printAveragedTimeMsOnIterationX called with a non-existing key.",
__LINE__, __FUNCTION__, __FILE__);
#else #else
UNUSED(key); UNUSED(key);
UNUSED(line); UNUSED(line);
...@@ -135,7 +152,8 @@ namespace op ...@@ -135,7 +152,8 @@ namespace op
log("GPU usage.", Priority::Max, line, function, file); log("GPU usage.", Priority::Max, line, function, file);
// GPU info // GPU info
const auto answer = std::system("nvidia-smi | grep \"Processes:\"") | std::system("nvidia-smi | grep \"Process name\""); const auto answer = std::system("nvidia-smi | grep \"Processes:\"")
| std::system("nvidia-smi | grep \"Process name\"");
if (answer != 0) if (answer != 0)
log("Error on the nvidia-smi header. Please, inform us of this error.", Priority::Max); log("Error on the nvidia-smi header. Please, inform us of this error.", Priority::Max);
else else
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册