OpenCV 4.0 compatibility

0fd438d8 · gineshidalgo99 · dcf98338 · 0fd438d8 · 0fd438d8 · 0fd438d8
59 changed file
--- a/doc/demo_overview.md
+++ b/doc/demo_overview.md
@@ -3,7 +3,7 @@ OpenPose Demo - Overview

 Forget about the OpenPose library code, just compile the library and use the demo `./build/examples/openpose/openpose.bin`.

-In order to learn how to use it, run `./build/examples/openpose/openpose.bin --help` in your bash and read all the available flags (check only the flags for `examples/openpose/openpose.cpp` itself, i.e. the section `Flags from examples/openpose/openpose.cpp:`). We detail some of them in the following sections.
+In order to learn how to use it, run `./build/examples/openpose/openpose.bin --help` in your bash and read all the available flags (check only the flags for `examples/openpose/openpose.cpp` itself, i.e., the section `Flags from examples/openpose/openpose.cpp:`). We detail some of them in the following sections.



@@ -144,7 +144,7 @@ Each flag is divided into flag name, default value, and description.
 - DEFINE_bool(flir_camera,                false,          "Whether to use FLIR (Point-Grey) stereo camera.");
 - DEFINE_int32(flir_camera_index,         -1,             "Select -1 (default) to run on all detected flir cameras at once. Otherwise, select the flir camera index to run, where 0 corresponds to the detected flir camera with the lowest serial number, and `n` to the `n`-th lowest serial number camera.");
 - DEFINE_string(ip_camera,                "",             "String with the IP camera URL. It supports protocols like RTSP and HTTP.");
- DEFINE_uint64(frame_first,              0,              "Start on desired frame number. Indexes are 0-based, i.e. the first frame has index 0.");
+- DEFINE_uint64(frame_first,              0,              "Start on desired frame number. Indexes are 0-based, i.e., the first frame has index 0.");
 - DEFINE_uint64(frame_step,               1,              "Step or gap between processed frames. E.g., `--frame_step 5` would read and process frames 0, 5, 10, etc..");
 - DEFINE_uint64(frame_last,               -1,             "Finish on desired frame number. Select -1 to disable. Indexes are 0-based, e.g., if set to 10, it will process 11 frames (0-10).");
 - DEFINE_bool(frame_flip,                 false,          "Flip/mirror each frame (e.g., for real time webcam demonstrations).");
@@ -159,7 +159,7 @@ Each flag is divided into flag name, default value, and description.
 - DEFINE_string(output_resolution,        "-1x-1",        "The image resolution (display and output). Use \"-1x-1\" to force the program to use the input image resolution.");
 - DEFINE_int32(num_gpu,                   -1,             "The number of GPU devices to use. If negative, it will use all the available GPUs in your machine.");
 - DEFINE_int32(num_gpu_start,             0,              "GPU device start number.");
- DEFINE_int32(keypoint_scale,            0,              "Scaling of the (x,y) coordinates of the final pose data array, i.e. the scale of the (x,y) coordinates that will be saved with the `write_json` & `write_keypoint` flags. Select `0` to scale it to the original source resolution; `1`to scale it to the net output size (set with `net_resolution`); `2` to scale it to the final output size (set with `resolution`); `3` to scale it in the range [0,1], where (0,0) would be the top-left corner of the image, and (1,1) the bottom-right one; and 4 for range [-1,1], where (-1,-1) would be the top-left corner of the image, and (1,1) the bottom-right one. Non related with `scale_number` and `scale_gap`.");
+- DEFINE_int32(keypoint_scale,            0,              "Scaling of the (x,y) coordinates of the final pose data array, i.e., the scale of the (x,y) coordinates that will be saved with the `write_json` & `write_keypoint` flags. Select `0` to scale it to the original source resolution; `1`to scale it to the net output size (set with `net_resolution`); `2` to scale it to the final output size (set with `resolution`); `3` to scale it in the range [0,1], where (0,0) would be the top-left corner of the image, and (1,1) the bottom-right one; and 4 for range [-1,1], where (-1,-1) would be the top-left corner of the image, and (1,1) the bottom-right one. Non related with `scale_number` and `scale_gap`.");
 - DEFINE_int32(number_people_max,         -1,             "This parameter will limit the maximum number of people detected, by keeping the people with top scores. The score is based in person area over the image, body part score, as well as joint score (between each pair of connected body parts). Useful if you know the exact number of people in the scene, so it can remove false positives (if all the people have been detected. However, it might also include false negatives by removing very small or highly occluded people. -1 will keep them all.");

 4. OpenPose Body Pose
@@ -174,7 +174,7 @@ Each flag is divided into flag name, default value, and description.
 - DEFINE_bool(heatmaps_add_bkg,           false,          "Same functionality as `add_heatmaps_parts`, but adding the heatmap corresponding to background.");
 - DEFINE_bool(heatmaps_add_PAFs,          false,          "Same functionality as `add_heatmaps_parts`, but adding the PAFs.");
 - DEFINE_int32(heatmaps_scale,            2,              "Set 0 to scale op::Datum::poseHeatMaps in the range [-1,1], 1 for [0,1]; 2 for integer rounded [0,255]; and 3 for no scaling.");
- DEFINE_bool(part_candidates,            false,          "Also enable `write_json` in order to save this information. If true, it will fill the op::Datum::poseCandidates array with the body part candidates. Candidates refer to all the detected body parts, before being assembled into people. Note that the number of candidates is equal or higher than the number of final body parts (i.e. after being assembled into people). The empty body parts are filled with 0s. Program speed will slightly decrease. Not required for OpenPose, enable it only if you intend to explicitly use this information.");
+- DEFINE_bool(part_candidates,            false,          "Also enable `write_json` in order to save this information. If true, it will fill the op::Datum::poseCandidates array with the body part candidates. Candidates refer to all the detected body parts, before being assembled into people. Note that the number of candidates is equal or higher than the number of final body parts (i.e., after being assembled into people). The empty body parts are filled with 0s. Program speed will slightly decrease. Not required for OpenPose, enable it only if you intend to explicitly use this information.");

 6. OpenPose Face
 - DEFINE_bool(face,                       false,          "Enables face keypoint detection. It will share some parameters from the body pose, e.g., `model_folder`. Note that this will considerable slow down the performance and increse the required GPU memory. In addition, the greater number of people on the image, the slower OpenPose will be.");
@@ -185,7 +185,7 @@ Each flag is divided into flag name, default value, and description.
 - DEFINE_string(hand_net_resolution,      "368x368",      "Multiples of 16 and squared. Analogous to `net_resolution` but applied to the hand keypoint detector.");
 - DEFINE_int32(hand_scale_number,         1,              "Analogous to `scale_number` but applied to the hand keypoint detector. Our best results were found with `hand_scale_number` = 6 and `hand_scale_range` = 0.4.");
 - DEFINE_double(hand_scale_range,         0.4,            "Analogous purpose than `scale_gap` but applied to the hand keypoint detector. Total range between smallest and biggest scale. The scales will be centered in ratio 1. E.g., if scaleRange = 0.4 and scalesNumber = 2, then there will be 2 scales, 0.8 and 1.2.");
- DEFINE_bool(hand_tracking,              false,          "Adding hand tracking might improve hand keypoints detection for webcam (if the frame rate is high enough, i.e. >7 FPS per GPU) and video. This is not person ID tracking, it simply looks for hands in positions at which hands were located in previous frames, but it does not guarantee the same person ID among frames.");
+- DEFINE_bool(hand_tracking,              false,          "Adding hand tracking might improve hand keypoints detection for webcam (if the frame rate is high enough, i.e., >7 FPS per GPU) and video. This is not person ID tracking, it simply looks for hands in positions at which hands were located in previous frames, but it does not guarantee the same person ID among frames.");

 8. OpenPose 3-D Reconstruction
 - DEFINE_bool(3d,                         false,          "Running OpenPose 3-D reconstruction demo: 1) Reading from a stereo camera system. 2) Performing 3-D reconstruction from the multiple views. 3) Displaying 3-D reconstruction results. Note that it will only display 1 person. If multiple people is present, it will fail.");
@@ -197,8 +197,8 @@ Each flag is divided into flag name, default value, and description.
 - DEFINE_bool(disable_blending,           false,          "If enabled, it will render the results (keypoint skeletons or heatmaps) on a black background, instead of being rendered into the original image. Related: `part_to_show`, `alpha_pose`, and `alpha_pose`.");

 10. OpenPose Rendering Pose
- DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e. wrong detections).");
- DEFINE_int32(render_pose,               -1,             "Set to 0 for no rendering, 1 for CPU rendering (slightly faster), and 2 for GPU rendering (slower but greater functionality, e.g., `alpha_X` flags). If -1, it will pick CPU if CPU_ONLY is enabled, or GPU if CUDA is enabled. If rendering is enabled, it will render both `outputData` and `cvOutputData` with the original image and desired body part to be shown (i.e. keypoints, heat maps or PAFs).");
+- DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong detections).");
+- DEFINE_int32(render_pose,               -1,             "Set to 0 for no rendering, 1 for CPU rendering (slightly faster), and 2 for GPU rendering (slower but greater functionality, e.g., `alpha_X` flags). If -1, it will pick CPU if CPU_ONLY is enabled, or GPU if CUDA is enabled. If rendering is enabled, it will render both `outputData` and `cvOutputData` with the original image and desired body part to be shown (i.e., keypoints, heat maps or PAFs).");
 - DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will hide it. Only valid for GPU rendering.");
 - DEFINE_double(alpha_heatmap,            0.7,            "Blending factor (range 0-1) between heatmap and original frame. 1 will only show the heatmap, 0 will only show the frame. Only valid for GPU rendering.");


--- a/doc/installation.md
+++ b/doc/installation.md
@@ -12,18 +12,19 @@ OpenPose - Installation
 8. [Uninstallation](#uninstallation)
 9. [Optional Settings](#optional-settings)
    1. [Profiling Speed](#profiling-speed)
-    2. [COCO and MPI Models](#coco-and-mpi-models)
-    3. [Python API](#python-api)
-    4. [CPU Version](#cpu-version)
-    5. [Mac OSX Version](#mac-osx-version)
-    6. [OpenCL Version](#opencl-version)
-    7. [3D Reconstruction Module](#3d-reconstruction-module)
-    8. [Calibration Module](#calibration-module)
-    9. [Compiling without cuDNN](#compiling-without-cudnn)
-    10. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
-    11. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
-    12. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
-    13. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)
+    2. [Faster GUI Display](#faster-gui-display)
+    3. [COCO and MPI Models](#coco-and-mpi-models)
+    4. [Python API](#python-api)
+    5. [CPU Version](#cpu-version)
+    6. [Mac OSX Version](#mac-osx-version)
+    7. [OpenCL Version](#opencl-version)
+    8. [3D Reconstruction Module](#3d-reconstruction-module)
+    9. [Calibration Module](#calibration-module)
+    10. [Compiling without cuDNN](#compiling-without-cudnn)
+    11. [Custom Caffe (Ubuntu Only)](#custom-caffe-ubuntu-only)
+    12. [Custom OpenCV (Ubuntu Only)](#custom-opencv-ubuntu-only)
+    13. [Doxygen Documentation Autogeneration (Ubuntu Only)](#doxygen-documentation-autogeneration-ubuntu-only)
+    14. [CMake Command Line Configuration (Ubuntu Only)](#cmake-command-line-configuration-ubuntu-only)



@@ -271,6 +272,11 @@ OpenPose displays the FPS in the basic GUI. However, more complex speed metrics



+#### Faster GUI Display
+Reduce the lag and increase the speed of displaying images by enabling the `WITH_OPENCV_WITH_OPENCL` flag. It tells OpenCV to render the images using OpenGL support. This speeds up rendering about 3x. E.g., it reduces from about 30 msec to about 10 msec the display time for HD resolution images. It requires OpenCV to be compiled with OpenGL support and it provokes a visual aspect-ratio artifact when rendering a folder with images of different resolutions.
+
+
+
 #### COCO and MPI Models
 By default, the body COCO and MPI models are not downloaded. You can download them by turning on the `DOWNLOAD_BODY_COCO_MODEL` or `DOWNLOAD_BODY_MPI_MODEL` flags. It's slightly faster but less accurate and has less keypoints than the COCO body model.


--- a/doc/installation_deprecated.md
+++ b/doc/installation_deprecated.md
@@ -148,7 +148,7 @@ Note: This installer will not incorporate any new features, we recommend to use
    2. Change `Debug` by `Release` mode.
    3. Compile it and run it with <kbd>F5</kbd> or the green play icon.
 4. If you have a webcam connected, OpenPose will automatically start after being compiled.
-5. In order to use the created exe file from the command line (i.e. outside Visual Studio), you have to:
+5. In order to use the created exe file from the command line (i.e., outside Visual Studio), you have to:
    1. Copy all the DLLs located on `{openpose_folder}\3rdparty\windows\caffe\bin\` on the exe folder: `{openpose_folder}\windows\x64\Release`.
    2. Copy all the DLLs located on `{openpose_folder}\3rdparty\windows\opencv\x64\vc14\bin\` on the exe folder: `{openpose_folder}\windows\x64\Release`.
    3. Open the Windows cmd (Windows button + <kbd>X</kbd>, then <kbd>A</kbd>).

--- a/doc/library_overview.md
+++ b/doc/library_overview.md
@@ -37,7 +37,7 @@ There are 4 different ways to allocate the memory:

 3. The `reset(const std::vector<int>& size)` function: It allocates the memory indicated for size. The allocated memory equals the product of all elements in the size vector. Internally, it is saved as a 1-D std::shared_ptr<T[]>.

-4. The `reset(const int size)` function: equivalent for 1-dimension data (i.e. vector).
+4. The `reset(const int size)` function: equivalent for 1-dimension data (i.e., vector).

 5. The `setFrom(const cv::Mat& cvMat)` function: It calls `reset()` and copies the data from `cvMat`.

@@ -59,9 +59,9 @@ There are several functions to get information about the allocated data:

 3. `int getSize(const int index)`: It returns the size of the `index` dimension.

-4. `size_t getNumberDimensions()`: It returns the number of dimensions (i.e. getSize().size()).
+4. `size_t getNumberDimensions()`: It returns the number of dimensions (i.e., getSize().size()).

-5. `size_t getVolume()`: It returns the total internal number of T objects, i.e. the product of all dimensions size.
+5. `size_t getVolume()`: It returns the total internal number of T objects, i.e., the product of all dimensions size.


 ### Datum - The OpenPose Basic Piece of Information Between Threads
@@ -147,17 +147,17 @@ Classes starting by the letter `W` + upper case letter (e.g., `WGui`) directly o

 The easiest way to create your own Worker is to inherit Worker<T>, and implement the work() function such us it just calls a wrapper to your desired functionality (check the source code of some of our basic Workers). Since the Worker classes are templates, they are always compiled. Therefore, including your desired functionality in a different file will let you compile it only once. Otherwise, it would be compiled any time that any code which uses your worker is compiled.

-All OpenPose Workers are templates, i.e. they are not only limited to work with the default op::Datum. However, if you intend to use some of our Workers, your custom `TDatums` class (the one substituting op::Datum) should implement the same variables and functions that those Workers use. The easiest solution is to inherit from `op::Datum` and extend its functionality.
+All OpenPose Workers are templates, i.e., they are not only limited to work with the default op::Datum. However, if you intend to use some of our Workers, your custom `TDatums` class (the one substituting op::Datum) should implement the same variables and functions that those Workers use. The easiest solution is to inherit from `op::Datum` and extend its functionality.


 ### Creating New Workers
 Users can directly implement their own `W` from Worker<T> or any other sub-inherited Worker[...]<T> class and add them to `ThreadManager`. For that, they just need to: inherit those classes from...

-1. Inherit from `Worker<T>` and implement the functionality `work(T& tDatum)`, i.e. it will use and modify tDatum.
+1. Inherit from `Worker<T>` and implement the functionality `work(T& tDatum)`, i.e., it will use and modify tDatum.

-2. Inherit from `WorkerProducer<T>` and implement the functionality `T work()`, i.e. it will create and return tDatum.
+2. Inherit from `WorkerProducer<T>` and implement the functionality `T work()`, i.e., it will create and return tDatum.

-3. Inherit from `WorkerConsumer<T>` and implement the functionality `work(const T& tDatum)`, i.e. it will use but will not modify tDatum.
+3. Inherit from `WorkerConsumer<T>` and implement the functionality `work(const T& tDatum)`, i.e., it will use but will not modify tDatum.

 We suggest users to also start their inherited `Worker<T>` classes with the `W` letter for code clarity, required if they want to send us a pull request.


--- a/doc/output.md
+++ b/doc/output.md
@@ -65,7 +65,7 @@ There are 2 alternatives to save the OpenPose output.
 }
 ```

-2. (Deprecated) The `write_keypoint` flag uses the OpenCV cv::FileStorage default formats, i.e. JSON (available after OpenCV 3.0), XML, and YML. Note that it does not include any other information othern than keypoints.
+2. (Deprecated) The `write_keypoint` flag uses the OpenCV cv::FileStorage default formats, i.e., JSON (available after OpenCV 3.0), XML, and YML. Note that it does not include any other information othern than keypoints.

 Both of them follow the keypoint ordering described in the [Keypoint Ordering](#keypoint-ordering) section.


--- a/doc/quick_start.md
+++ b/doc/quick_start.md
@@ -14,7 +14,7 @@ OpenPose - Quick Start


 ## Quick Start
-Check that the library is working properly by running any of the following commands. Make sure that you are in the **root directory of the project** (i.e. in the OpenPose folder, not inside `build/` nor `windows/` nor `bin/`). In addition, `examples/media/video.avi` and `examples/media` do exist, no need to change the paths.
+Check that the library is working properly by running any of the following commands. Make sure that you are in the **root directory of the project** (i.e., in the OpenPose folder, not inside `build/` nor `windows/` nor `bin/`). In addition, `examples/media/video.avi` and `examples/media` do exist, no need to change the paths.

 ### Running on Video
 ```

--- a/doc/release_notes.md
+++ b/doc/release_notes.md
@@ -76,7 +76,7 @@ OpenPose Library - Release Notes

 ## OpenPose 1.0.1 (Jul 11, 2017)
 1. Main improvements:
-    1. Windows library turned into DLL dynamic library (i.e. portable).
+    1. Windows library turned into DLL dynamic library (i.e., portable).
    2. Improved documentation.
 2. Functions or parameters renamed:
    1. `openpose/utilities/macros.hpp` moved to `openpose/utilities/macros.hpp`.
@@ -282,14 +282,20 @@ OpenPose Library - Release Notes
        4. Array::Array that takes as input a pointer, so it does not re-allocate memory.
    12. Producer defined inside Wrapper rather than being defined on each example.
    13. Reduced many Visual Studio warnings (e.g., uncontrolled conversions between types).
+    14. Added new keypoint-related auxiliary functions in `utilities/keypoints.hpp`.
+    15. Function `resizeFixedAspectRatio` can take already allocated memory (e.g., faster if target is an Array<T> object, no intermediate cv::Mat required).
+    16. Added compatibility for OpenCV 4.0, while preserving 2.4.X and 3.X compatibility.
 2. Functions or parameters renamed:
    1. By default, python example `tutorial_developer/python_2_pose_from_heatmaps.py` was using 2 scales starting at -1x736, changed to 1 scale at -1x368.
    2. WrapperStructPose default parameters changed to match those of the OpenPose demo binary.
    3. WrapperT.configure() changed from 1 function that requries all arguments to individual functions that take 1 argument each.
+    4. Added `Forward` to all net classes that automatically selects between CUDA, OpenCL, or CPU-only version depending on the defines.
 3. Main bugs fixed:
    1. CMake-GUI was forcing to Release mode, allowed Debug modes too.
    2. NMS returns in index 0 the number of found peaks. However, while the number of peaks was truncated to a maximum of 127, this index 0 was saving the real number instead of the truncated one.
    3. Template functions could not be imported in Windows for projects using the OpenPose library DLL.
+    4. Function `scaleKeypoints2d` was not working if any of the scales was 1 (e.g., fail if scaleX = 1 but scaleY != 1, or if any offset was not 0).
+    5. Fixed bug in `KeepTopNPeople` that could provoke segmentation fault for `number_people_max` > 1.




--- a/examples/tests/hand_accuracy_test.sh
+++ b/examples/tests/hand_accuracy_test.sh
@@ -11,7 +11,7 @@
        # Read that script for details about all the paths and change them for your own paths.

        # Careful:
-        # If you are using the NAS, please do not override my files, i.e. please change the output paths (corresponding to the ones indicated by `--write_json`, which is ).
+        # If you are using the NAS, please do not override my files, i.e., please change the output paths (corresponding to the ones indicated by `--write_json`, which is ).

        # In order to generate the JSON output:
        # Uncomment the commented lines starting by `--write_json` and `--display 0`

--- a/examples/tutorial_add_module/1_custom_post_processing.cpp
+++ b/examples/tutorial_add_module/1_custom_post_processing.cpp
@@ -18,7 +18,7 @@
 // 2. The internal temporary function variable equivalent would be thisIsAVariable.
 // 3. Every line cannot have more than 120 characters.
 // 4. If extra classes and files are required, add those extra files inside the OpenPose include and src folders,
-// under a new folder (i.e. `include/newMethod/` and `src/newMethod/`), including `namespace op` on those files.
+// under a new folder (i.e., `include/newMethod/` and `src/newMethod/`), including `namespace op` on those files.

 // This example is a sub-case of `tutorial_api_cpp/6_synchronous_custom_postprocessing.cpp`, where only custom post-processing is
 // considered.

--- a/examples/tutorial_api_cpp/4_asynchronous_loop_custom_input_and_output.cpp
+++ b/examples/tutorial_api_cpp/4_asynchronous_loop_custom_input_and_output.cpp
@@ -12,7 +12,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_api_cpp/5_asynchronous_loop_custom_output.cpp
+++ b/examples/tutorial_api_cpp/5_asynchronous_loop_custom_output.cpp
@@ -12,7 +12,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_api_cpp/6_synchronous_custom_postprocessing.cpp
+++ b/examples/tutorial_api_cpp/6_synchronous_custom_postprocessing.cpp
@@ -13,7 +13,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_api_cpp/7_synchronous_custom_input.cpp
+++ b/examples/tutorial_api_cpp/7_synchronous_custom_input.cpp
@@ -13,7 +13,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_api_cpp/8_synchronous_custom_output.cpp
+++ b/examples/tutorial_api_cpp/8_synchronous_custom_output.cpp
@@ -13,7 +13,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_api_cpp/9_synchronous_custom_all.cpp
+++ b/examples/tutorial_api_cpp/9_synchronous_custom_all.cpp
@@ -12,7 +12,7 @@
    // 1. `core` module:
        // For the Array<float> class that the `pose` module needs
        // For the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively
 // This file should only be used for the user to take specific examples.

 // Command-line user intraface

--- a/examples/tutorial_developer/pose_1_extract_from_image.cpp
+++ b/examples/tutorial_developer/pose_1_extract_from_image.cpp
@@ -6,7 +6,7 @@
    // 4. Display the rendered pose (`gui` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Array<float> class that the `pose` module needs
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
@@ -55,7 +55,7 @@ DEFINE_bool(disable_blending,           false,          "If enabled, it will ren
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                        " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
                                                        " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
-                                                        " more false positives (i.e. wrong detections).");
+                                                        " more false positives (i.e., wrong detections).");
 DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
                                                        " hide it. Only valid for GPU rendering.");

@@ -96,7 +96,7 @@ int tutorialDeveloperPose1()
                                         (float)FLAGS_alpha_pose};
        op::OpOutputToCvMat opOutputToCvMat;
        op::FrameDisplayer frameDisplayer{"OpenPose Tutorial - Example 1", outputSize};
-        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
+        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e., we init resources here)
        poseExtractorCaffe.initializationOnThread();
        poseRenderer.initializationOnThread();


--- a/examples/tutorial_developer/pose_2_extract_pose_or_heatmat_from_image.cpp
+++ b/examples/tutorial_developer/pose_2_extract_pose_or_heatmat_from_image.cpp
@@ -6,7 +6,7 @@
    // 4. Display the rendered pose or heatmap (`gui` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Array<float> class that the `pose` module needs
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string
@@ -58,7 +58,7 @@ DEFINE_bool(disable_blending,           false,          "If enabled, it will ren
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                        " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
                                                        " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
-                                                        " more false positives (i.e. wrong detections).");
+                                                        " more false positives (i.e., wrong detections).");
 DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
                                                        " hide it. Only valid for GPU rendering.");
 DEFINE_double(alpha_heatmap,            0.7,            "Blending factor (range 0-1) between heatmap and original frame. 1 will only show the"
@@ -103,7 +103,7 @@ int tutorialDeveloperPose2()
        poseGpuRenderer.setElementToRender(FLAGS_part_to_show);
        op::OpOutputToCvMat opOutputToCvMat;
        op::FrameDisplayer frameDisplayer{"OpenPose Tutorial - Example 2", outputSize};
-        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
+        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e., we init resources here)
        poseExtractorPtr->initializationOnThread();
        poseGpuRenderer.initializationOnThread();


--- a/examples/tutorial_developer/thread_1_openpose_read_and_display.cpp
+++ b/examples/tutorial_developer/thread_1_openpose_read_and_display.cpp
@@ -5,7 +5,7 @@
    // Everything in a multi-thread scenario (`thread` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string

--- a/examples/tutorial_developer/thread_2_user_processing_function.cpp
+++ b/examples/tutorial_developer/thread_2_user_processing_function.cpp
@@ -6,7 +6,7 @@
    // Everything in a multi-thread scenario (`thread` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string

--- a/examples/tutorial_developer/thread_3_user_input_processing_and_output.cpp
+++ b/examples/tutorial_developer/thread_3_user_input_processing_and_output.cpp
@@ -6,7 +6,7 @@
    // Everything in a multi-thread scenario (`thread` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string

--- a/examples/tutorial_developer/thread_4_user_input_processing_output_and_datum.cpp
+++ b/examples/tutorial_developer/thread_4_user_input_processing_output_and_datum.cpp
@@ -6,7 +6,7 @@
    // Everything in a multi-thread scenario (`thread` module)
 // In addition to the previous OpenPose modules, we also need to use:
    // 1. `core` module: for the Datum struct that the `thread` module sends between the queues
-    // 2. `utilities` module: for the error & logging functions, i.e. op::error & op::log respectively
+    // 2. `utilities` module: for the error & logging functions, i.e., op::error & op::log respectively

 // 3rdparty dependencies
 // GFlags: DEFINE_bool, _int32, _int64, _uint64, _double, _string

--- a/include/openpose/core/array.hpp
+++ b/include/openpose/core/array.hpp
@@ -271,7 +271,7 @@ namespace op
         * This function is only implemented for Pybind11 usage.
         * @return A raw pointer to the data.
         */
-        inline T* getPybindPtr() const
+        inline T* getPseudoConstPtr() const
        {
            return pData; // spData.get()
        }

--- a/include/openpose/core/macros.hpp
+++ b/include/openpose/core/macros.hpp
@@ -22,7 +22,7 @@ const std::string OPEN_POSE_NAME_AND_VERSION = OPEN_POSE_NAME_STRING + " " + OPE
    #define OP_API __declspec(dllimport)
 #endif

-//Disable some Windows Warnings
+// Disable some Windows Warnings
 #ifdef _WIN32
    #pragma warning ( disable : 4251 ) // XXX needs to have dll-interface to be used by clients of class YYY
    #pragma warning( disable: 4275 ) // non dll-interface structXXX used as base
@@ -84,4 +84,36 @@ namespace boost
    template <typename T> class shared_ptr; // E.g., boost::shared_ptr<caffe::Blob<float>>
 }

+// Compabitility for OpenCV 4.0 while preserving 2.4.X and 3.X compatibility
+#if (defined(CV_MAJOR_VERSION) && CV_MAJOR_VERSION == 4)
+    #define CV_BGR2GRAY cv::COLOR_BGR2GRAY
+    #define CV_CALIB_CB_ADAPTIVE_THRESH cv::CALIB_CB_ADAPTIVE_THRESH
+    #define CV_CALIB_CB_NORMALIZE_IMAGE cv::CALIB_CB_NORMALIZE_IMAGE
+    #define CV_CALIB_CB_FILTER_QUADS cv::CALIB_CB_FILTER_QUADS
+    #define CV_CAP_PROP_FPS cv::CAP_PROP_FPS
+    #define CV_CAP_PROP_FRAME_COUNT cv::CAP_PROP_FRAME_COUNT
+    #define CV_CAP_PROP_FRAME_HEIGHT cv::CAP_PROP_FRAME_HEIGHT
+    #define CV_CAP_PROP_FRAME_WIDTH cv::CAP_PROP_FRAME_WIDTH
+    #define CV_CAP_PROP_POS_FRAMES cv::CAP_PROP_POS_FRAMES
+    #define CV_FOURCC cv::VideoWriter::fourcc
+    #define CV_GRAY2BGR cv::COLOR_GRAY2BGR
+    #define CV_HAAR_SCALE_IMAGE cv::CASCADE_SCALE_IMAGE
+    #define CV_IMWRITE_JPEG_QUALITY cv::IMWRITE_JPEG_QUALITY
+    #define CV_IMWRITE_PNG_COMPRESSION cv::IMWRITE_PNG_COMPRESSION
+    #define CV_INTER_CUBIC cv::INTER_CUBIC
+    #define CV_INTER_LINEAR cv::INTER_LINEAR
+    #define CV_L2 cv::NORM_L2
+    #define CV_LOAD_IMAGE_ANYDEPTH cv::IMREAD_ANYDEPTH
+    #define CV_LOAD_IMAGE_COLOR cv::IMREAD_COLOR
+    #define CV_LOAD_IMAGE_GRAYSCALE cv::IMREAD_GRAYSCALE
+    #define CV_TERMCRIT_EPS cv::TermCriteria::Type::EPS
+    #define CV_TERMCRIT_ITER cv::TermCriteria::Type::MAX_ITER
+    #define CV_WARP_INVERSE_MAP cv::WARP_INVERSE_MAP
+    #define CV_WINDOW_FULLSCREEN cv::WINDOW_FULLSCREEN
+    #define CV_WINDOW_KEEPRATIO cv::WINDOW_KEEPRATIO
+    #define CV_WINDOW_NORMAL cv::WINDOW_NORMAL
+    #define CV_WINDOW_OPENGL cv::WINDOW_OPENGL
+    #define CV_WND_PROP_FULLSCREEN cv::WND_PROP_FULLSCREEN
+#endif
+
 #endif // OPENPOSE_CORE_MACROS_HPP
--- a/include/openpose/face/faceExtractorNet.hpp
+++ b/include/openpose/face/faceExtractorNet.hpp
@@ -50,7 +50,7 @@ namespace op
        /**
         * This function returns the face keypoins. VERY IMPORTANT: use getFaceKeypoints().clone() if the keypoints are
         * going to be edited in a different thread.
-         * @return A Array with all the face keypoints. It follows the pose structure, i.e. the first dimension
+         * @return A Array with all the face keypoints. It follows the pose structure, i.e., the first dimension
         * corresponds to all the people in the image, the second to each specific keypoint, and the third one to
         * (x, y, score).
         */

--- a/include/openpose/flags.hpp
+++ b/include/openpose/flags.hpp
@@ -46,7 +46,7 @@ DEFINE_int32(flir_camera_index,         -1,             "Select -1 (default) to
                                                        " camera index to run, where 0 corresponds to the detected flir camera with the lowest"
                                                        " serial number, and `n` to the `n`-th lowest serial number camera.");
 DEFINE_string(ip_camera,                "",             "String with the IP camera URL. It supports protocols like RTSP and HTTP.");
-DEFINE_uint64(frame_first,              0,              "Start on desired frame number. Indexes are 0-based, i.e. the first frame has index 0.");
+DEFINE_uint64(frame_first,              0,              "Start on desired frame number. Indexes are 0-based, i.e., the first frame has index 0.");
 DEFINE_uint64(frame_step,               1,              "Step or gap between processed frames. E.g., `--frame_step 5` would read and process frames"
                                                        " 0, 5, 10, etc..");
 DEFINE_uint64(frame_last,               -1,             "Finish on desired frame number. Select -1 to disable. Indexes are 0-based, e.g., if set to"
@@ -68,7 +68,7 @@ DEFINE_string(output_resolution,        "-1x-1",        "The image resolution (d
 DEFINE_int32(num_gpu,                   -1,             "The number of GPU devices to use. If negative, it will use all the available GPUs in your"
                                                        " machine.");
 DEFINE_int32(num_gpu_start,             0,              "GPU device start number.");
-DEFINE_int32(keypoint_scale,            0,              "Scaling of the (x,y) coordinates of the final pose data array, i.e. the scale of the (x,y)"
+DEFINE_int32(keypoint_scale,            0,              "Scaling of the (x,y) coordinates of the final pose data array, i.e., the scale of the (x,y)"
                                                        " coordinates that will be saved with the `write_json` & `write_keypoint` flags."
                                                        " Select `0` to scale it to the original source resolution; `1`to scale it to the net output"
                                                        " size (set with `net_resolution`); `2` to scale it to the final output size (set with"
@@ -113,7 +113,7 @@ DEFINE_int32(heatmaps_scale,            2,              "Set 0 to scale op::Datu
 DEFINE_bool(part_candidates,            false,          "Also enable `write_json` in order to save this information. If true, it will fill the"
                                                        " op::Datum::poseCandidates array with the body part candidates. Candidates refer to all"
                                                        " the detected body parts, before being assembled into people. Note that the number of"
-                                                        " candidates is equal or higher than the number of final body parts (i.e. after being"
+                                                        " candidates is equal or higher than the number of final body parts (i.e., after being"
                                                        " assembled into people). The empty body parts are filled with 0s. Program speed will"
                                                        " slightly decrease. Not required for OpenPose, enable it only if you intend to explicitly"
                                                        " use this information.");
@@ -137,7 +137,7 @@ DEFINE_double(hand_scale_range,         0.4,            "Analogous purpose than
                                                        " between smallest and biggest scale. The scales will be centered in ratio 1. E.g., if"
                                                        " scaleRange = 0.4 and scalesNumber = 2, then there will be 2 scales, 0.8 and 1.2.");
 DEFINE_bool(hand_tracking,              false,          "Adding hand tracking might improve hand keypoints detection for webcam (if the frame rate"
-                                                        " is high enough, i.e. >7 FPS per GPU) and video. This is not person ID tracking, it"
+                                                        " is high enough, i.e., >7 FPS per GPU) and video. This is not person ID tracking, it"
                                                        " simply looks for hands in positions at which hands were located in previous frames, but"
                                                        " it does not guarantee the same person ID among frames.");
 // OpenPose 3-D Reconstruction
@@ -171,12 +171,12 @@ DEFINE_bool(disable_blending,           false,          "If enabled, it will ren
 DEFINE_double(render_threshold,         0.05,           "Only estimated keypoints whose score confidences are higher than this threshold will be"
                                                        " rendered. Generally, a high threshold (> 0.5) will only render very clear body parts;"
                                                        " while small thresholds (~0.1) will also output guessed and occluded keypoints, but also"
-                                                        " more false positives (i.e. wrong detections).");
+                                                        " more false positives (i.e., wrong detections).");
 DEFINE_int32(render_pose,               -1,             "Set to 0 for no rendering, 1 for CPU rendering (slightly faster), and 2 for GPU rendering"
                                                        " (slower but greater functionality, e.g., `alpha_X` flags). If -1, it will pick CPU if"
                                                        " CPU_ONLY is enabled, or GPU if CUDA is enabled. If rendering is enabled, it will render"
                                                        " both `outputData` and `cvOutputData` with the original image and desired body part to be"
-                                                        " shown (i.e. keypoints, heat maps or PAFs).");
+                                                        " shown (i.e., keypoints, heat maps or PAFs).");
 DEFINE_double(alpha_pose,               0.6,            "Blending factor (range 0-1) for the body part rendering. 1 will show it completely, 0 will"
                                                        " hide it. Only valid for GPU rendering.");
 DEFINE_double(alpha_heatmap,            0.7,            "Blending factor (range 0-1) between heatmap and original frame. 1 will only show the"

--- a/include/openpose/gui/frameDisplayer.hpp
+++ b/include/openpose/gui/frameDisplayer.hpp
@@ -19,7 +19,7 @@ namespace op
         * part of the window.
         * @param initialWindowedSize const Point<int> with the initial window output resolution (width and height).
         * @param fullScreen bool from which the FrameDisplayer::FullScreenMode property mFullScreenMode will be set,
-         * i.e. specifying the type of initial display (it can be changed later).
+         * i.e., specifying the type of initial display (it can be changed later).
         */
        FrameDisplayer(const std::string& windowedName = OPEN_POSE_NAME_AND_VERSION,
                       const Point<int>& initialWindowedSize = Point<int>{}, const bool fullScreen = false);

--- a/include/openpose/hand/handExtractorNet.hpp
+++ b/include/openpose/hand/handExtractorNet.hpp
@@ -57,7 +57,7 @@ namespace op
         * This function returns the hand keypoins. VERY IMPORTANT: use getHandKeypoints().clone() if the keypoints are
         * going to be edited in a different thread.
         * @return A std::array with all the left hand keypoints (index 0) and all the right ones (index 1). Each
-         * Array<float> follows the pose structure, i.e. the first dimension corresponds to all the people in the
+         * Array<float> follows the pose structure, i.e., the first dimension corresponds to all the people in the
         * image, the second to each specific keypoint, and the third one to (x, y, score).
         */
        std::array<Array<float>, 2> getHandKeypoints() const;

--- a/include/openpose/net/bodyPartConnectorCaffe.hpp
+++ b/include/openpose/net/bodyPartConnectorCaffe.hpp
@@ -33,6 +33,9 @@ namespace op

        void setScaleNetToOutput(const T scaleNetToOutput);

+        virtual void Forward(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
+                             Array<T>& poseScores);
+
        virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
                                 Array<T>& poseScores);


--- a/include/openpose/net/maximumCaffe.hpp
+++ b/include/openpose/net/maximumCaffe.hpp
@@ -22,6 +22,8 @@ namespace op

        virtual inline const char* type() const { return "Maximum"; }

+        virtual void Forward(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
+
        virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

        virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

--- a/include/openpose/net/nmsCaffe.hpp
+++ b/include/openpose/net/nmsCaffe.hpp
@@ -28,6 +28,8 @@ namespace op
        // Empirically gives better results (copied from Matlab original code)
        void setOffset(const Point<T>& offset);

+        virtual void Forward(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
+
        virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

        virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

--- a/include/openpose/net/resizeAndMergeCaffe.hpp
+++ b/include/openpose/net/resizeAndMergeCaffe.hpp
@@ -26,6 +26,8 @@ namespace op

        void setScaleRatios(const std::vector<T>& scaleRatios);

+        virtual void Forward(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);
+
        virtual void Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

        virtual void Forward_gpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top);

--- a/include/openpose/utilities/keypoint.hpp
+++ b/include/openpose/utilities/keypoint.hpp
@@ -18,8 +18,7 @@ namespace op
    void scaleKeypoints2d(Array<T>& keypoints, const T scaleX, const T scaleY);

    template <typename T>
-    void scaleKeypoints2d(Array<T>& keypoints, const T scaleX, const T scaleY, const T offsetX,
-                                 const T offsetY);
+    void scaleKeypoints2d(Array<T>& keypoints, const T scaleX, const T scaleY, const T offsetX, const T offsetY);

    template <typename T>
    void renderKeypointsCpu(Array<T>& frameArray, const Array<T>& keypoints, const std::vector<unsigned int>& pairs,
@@ -37,6 +36,23 @@ namespace op

    template <typename T>
    int getBiggestPerson(const Array<T>& keypoints, const T threshold);
+
+    template <typename T>
+    int getNonZeroKeypoints(const Array<T>& keypoints, const int person, const T threshold);
+
+    template <typename T>
+    T getDistanceAverage(const Array<T>& keypoints, const int personA, const int personB, const T threshold);
+
+    template <typename T>
+    T getDistanceAverage(const Array<T>& keypointsA, const int personA, const Array<T>& keypointsB, const int personB,
+                         const T threshold);
+
+    template <typename T>
+    float getKeypointsROI(const Array<T>& keypoints, const int personA, const int personB, const T threshold);
+
+    template <typename T>
+    float getKeypointsROI(const Array<T>& keypointsA, const int personA, const Array<T>& keypointsB, const int personB,
+                          const T threshold);
 }

 #endif // OPENPOSE_UTILITIES_KEYPOINT_HPP
--- a/include/openpose/utilities/openCv.hpp
+++ b/include/openpose/utilities/openCv.hpp
@@ -16,9 +16,11 @@ namespace op

    OP_API double resizeGetScaleFactor(const Point<int>& initialSize, const Point<int>& targetSize);

-    OP_API cv::Mat resizeFixedAspectRatio(const cv::Mat& cvMat, const double scaleFactor, const Point<int>& targetSize,
-                                          const int borderMode = cv::BORDER_CONSTANT,
-                                          const cv::Scalar& borderValue = cv::Scalar{0,0,0});
+    OP_API void resizeFixedAspectRatio(
+        cv::Mat& resizedCvMat, const cv::Mat& cvMat, const double scaleFactor, const Point<int>& targetSize,
+        const int borderMode = cv::BORDER_CONSTANT, const cv::Scalar& borderValue = cv::Scalar{0,0,0});
+
+    OP_API void keepRoiInside(cv::Rect& roi, const int imageWidth, const int imageHeight);
 }

 #endif // OPENPOSE_UTILITIES_OPEN_CV_HPP
--- a/include/openpose/wrapper/wrapperStructFace.hpp
+++ b/include/openpose/wrapper/wrapperStructFace.hpp
@@ -48,7 +48,7 @@ namespace op
        /**
         * Rendering threshold. Only estimated keypoints whose score confidences are higher than this value will be
         * rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds
-         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e. wrong
+         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong
         * detections).
         */
        float renderThreshold;

--- a/include/openpose/wrapper/wrapperStructHand.hpp
+++ b/include/openpose/wrapper/wrapperStructHand.hpp
@@ -42,8 +42,8 @@ namespace op

        /**
         * Whether to add tracking between frames. Adding hand tracking might improve hand keypoints detection for
-         * webcam (if the frame rate is high enough, i.e. >7 FPS per GPU) and video. This is not person ID tracking, it
-         * simply looks for hands in positions at which hands were located in previous frames, but it does not
+         * webcam (if the frame rate is high enough, i.e., >7 FPS per GPU) and video. This is not person ID tracking,
+         * it simply looks for hands in positions at which hands were located in previous frames, but it does not
         * guarantee the same person id among frames.
         */
        bool tracking;
@@ -70,7 +70,7 @@ namespace op
        /**
         * Rendering threshold. Only estimated keypoints whose score confidences are higher than this value will be
         * rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds
-         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e. wrong
+         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong
         * detections).
         */
        float renderThreshold;

--- a/include/openpose/wrapper/wrapperStructPose.hpp
+++ b/include/openpose/wrapper/wrapperStructPose.hpp
@@ -143,7 +143,7 @@ namespace op
        /**
         * Rendering threshold. Only estimated keypoints whose score confidences are higher than this value will be
         * rendered. Generally, a high threshold (> 0.5) will only render very clear body parts; while small thresholds
-         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e. wrong
+         * (~0.1) will also output guessed and occluded keypoints, but also more false positives (i.e., wrong
         * detections).
         */
        float renderThreshold;

--- a/python/openpose/_openpose.cpp
+++ b/python/openpose/_openpose.cpp
@@ -122,7 +122,7 @@ public:
        peaksBlob = { std::make_shared<caffe::Blob<float>>(1,1,1,1) };
        bodyPartConnectorCaffe->setPoseModel(poseModel);

-        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e. we init resources here)
+        // Step 4 - Initialize resources on desired thread (in this case single thread, i.e., we init resources here)
        poseExtractorCaffe->initializationOnThread();
        poseRenderer->initializationOnThread();
    }

--- a/src/openpose/calibration/cameraParameterEstimation.cpp
+++ b/src/openpose/calibration/cameraParameterEstimation.cpp
@@ -314,7 +314,7 @@ namespace op

                // If the difference between them is <= 180 degrees, then we just return the traditional average.
                // Examples:
-                //     - If both have the same signs, i.e. both in range [0, 180] or both in range (-180, 0)
+                //     - If both have the same signs, i.e., both in range [0, 180] or both in range (-180, 0)
                //     - If one in range [0, 90] and the other in range [-90, 0]
                //     - Etc.
                auto average = std::accumulate(anglesNormalized.begin(), anglesNormalized.end(), 0.)

--- a/src/openpose/core/cvMatToOpInput.cpp
+++ b/src/openpose/core/cvMatToOpInput.cpp
@@ -33,8 +33,8 @@ namespace op
            {
                inputNetData[i].reset({1, 3, netInputSizes.at(i).y, netInputSizes.at(i).x});
                std::vector<double> scaleRatios(numberScales, 1.f);
-                const cv::Mat frameWithNetSize = resizeFixedAspectRatio(cvInputData, scaleInputToNetInputs[i],
-                                                                        netInputSizes[i]);
+                cv::Mat frameWithNetSize;
+                resizeFixedAspectRatio(frameWithNetSize, cvInputData, scaleInputToNetInputs[i], netInputSizes[i]);
                // Fill inputNetData[i]
                uCharCvMatToFloatPtr(inputNetData[i].getPtr(), frameWithNetSize,
                                     (mPoseModel == PoseModel::BODY_19N ? 2 : 1));

--- a/src/openpose/core/cvMatToOpOutput.cpp
+++ b/src/openpose/core/cvMatToOpOutput.cpp
@@ -18,8 +18,8 @@ namespace op
            if (outputResolution.x <= 0 || outputResolution.y <= 0)
                error("Output resolution has 0 area.", __LINE__, __FUNCTION__, __FILE__);
            // outputData - Reescale keeping aspect ratio and transform to float the output image
-            const cv::Mat frameWithOutputSize = resizeFixedAspectRatio(cvInputData, scaleInputToOutput,
-                                                                       outputResolution);
+            cv::Mat frameWithOutputSize;
+            resizeFixedAspectRatio(frameWithOutputSize, cvInputData, scaleInputToOutput, outputResolution);
            Array<float> outputData({outputResolution.y, outputResolution.x, 3});
            frameWithOutputSize.convertTo(outputData.getCvMat(), CV_32FC3);
            // Return result

--- a/src/openpose/core/keepTopNPeople.cpp
+++ b/src/openpose/core/keepTopNPeople.cpp
@@ -17,7 +17,7 @@ namespace op
        try
        {
            // Remove people if #people > mNumberPeopleMax
-            if (peopleArray.getSize(0) > (unsigned int)mNumberPeopleMax && mNumberPeopleMax > 0)
+            if (peopleArray.getSize(0) > mNumberPeopleMax && mNumberPeopleMax > 0)
            {
                // Sanity checks
                if (poseScores.getVolume() != (unsigned int) poseScores.getSize(0))

--- a/src/openpose/core/scaleAndSizeExtractor.cpp
+++ b/src/openpose/core/scaleAndSizeExtractor.cpp
@@ -65,7 +65,7 @@ namespace op
            {
                const auto currentScale = 1. - i*mScaleGap;
                if (currentScale < 0. || 1. < currentScale)
-                    error("All scales must be in the range [0, 1], i.e. 0 <= 1-scale_number*scale_gap <= 1",
+                    error("All scales must be in the range [0, 1], i.e., 0 <= 1-scale_number*scale_gap <= 1",
                          __LINE__, __FUNCTION__, __FILE__);

                const auto targetWidth = fastTruncate(intRound(poseNetInputSize.x * currentScale) / 16 * 16, 1,

--- a/src/openpose/face/faceDetector.cpp
+++ b/src/openpose/face/faceDetector.cpp
@@ -55,7 +55,7 @@ namespace op
                // factor * dist(neck, headNose)
                if (neckScoreAbove && headNoseScoreAbove)
                {
-                    // If profile (i.e. only 1 eye and ear visible) --> avg(headNose, eye & ear position)
+                    // If profile (i.e., only 1 eye and ear visible) --> avg(headNose, eye & ear position)
                    if ((lEyeScoreAbove) == (lEarScoreAbove)
                        && (rEyeScoreAbove) == (rEarScoreAbove)
                        && (lEyeScoreAbove) != (rEyeScoreAbove))

--- a/src/openpose/face/faceExtractorCaffe.cpp
+++ b/src/openpose/face/faceExtractorCaffe.cpp
@@ -251,30 +251,12 @@ namespace op
                            }

                            // 2. Resize heat maps + merge different scales
-                            #ifdef USE_CUDA
-                                upImpl->spResizeAndMergeCaffe->Forward_gpu({upImpl->spCaffeNetOutputBlob.get()},
-                                                                           {upImpl->spHeatMapsBlob.get()});
-                                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
-                            #elif USE_OPENCL
-                                upImpl->spResizeAndMergeCaffe->Forward_ocl({upImpl->spCaffeNetOutputBlob.get()},
-                                                                           {upImpl->spHeatMapsBlob.get()});
-                            #else
-                                upImpl->spResizeAndMergeCaffe->Forward_cpu({upImpl->spCaffeNetOutputBlob.get()},
-                                                                           {upImpl->spHeatMapsBlob.get()});
-                            #endif
+                            upImpl->spResizeAndMergeCaffe->Forward(
+                                {upImpl->spCaffeNetOutputBlob.get()}, {upImpl->spHeatMapsBlob.get()});

                            // 3. Get peaks by Non-Maximum Suppression
-                            #ifdef USE_CUDA
-                                upImpl->spMaximumCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()},
-                                                                    {upImpl->spPeaksBlob.get()});
-                                cudaCheck(__LINE__, __FUNCTION__, __FILE__);
-                            #elif USE_OPENCL
-                                // CPU Version is already very fast (4ms) and data is sent to connectKeypoints as CPU for now anyway
-                                upImpl->spMaximumCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});
-                            #else
-                                upImpl->spMaximumCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()},
-                                                                    {upImpl->spPeaksBlob.get()});
-                            #endif
+                            upImpl->spMaximumCaffe->Forward(
+                                {upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});

                            const auto* facePeaksPtr = upImpl->spPeaksBlob->mutable_cpu_data();
                            for (auto part = 0 ; part < mFaceKeypoints.getSize(1) ; part++)
@@ -294,14 +276,16 @@ namespace op
                                mFaceKeypoints[baseIndex+2] = score;
                            }
                            // HeatMaps: storing
-                            if (!mHeatMapTypes.empty()){
-                                #ifdef USE_CUDA
-                                    updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode,
-                                                                upImpl->spHeatMapsBlob->gpu_data());
-                                #else
-                                    updateFaceHeatMapsForPerson(mHeatMaps, person, mHeatMapScaleMode,
-                                                                upImpl->spHeatMapsBlob->cpu_data());
-                                #endif
+                            if (!mHeatMapTypes.empty())
+                            {
+                                updateFaceHeatMapsForPerson(
+                                    mHeatMaps, person, mHeatMapScaleMode,
+                                    #ifdef USE_CUDA
+                                        upImpl->spHeatMapsBlob->gpu_data()
+                                    #else
+                                        upImpl->spHeatMapsBlob->cpu_data()
+                                    #endif
+                                );
                            }
                        }
                    }
@@ -310,6 +294,11 @@ namespace op
                }
                else
                    mFaceKeypoints.reset();
+
+                // 5. CUDA sanity check
+                #ifdef USE_CUDA
+                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+                #endif
            #else
                UNUSED(faceRectangles);
                UNUSED(cvInputData);

--- a/src/openpose/face/faceGpuRenderer.cpp
+++ b/src/openpose/face/faceGpuRenderer.cpp
@@ -18,7 +18,7 @@ namespace op
    {
        try
        {
-            // Free CUDA pointers - Note that if pointers are 0 (i.e. nullptr), no operation is performed.
+            // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
            #ifdef USE_CUDA
                cudaFree(pGpuFace);
            #endif

--- a/src/openpose/filestream/cocoJsonSaver.cpp
+++ b/src/openpose/filestream/cocoJsonSaver.cpp
@@ -71,7 +71,7 @@ namespace op
                mJsonOfstream.plainText("1");
                mJsonOfstream.comma();

-                // keypoints - i.e. poseKeypoints
+                // keypoints - i.e., poseKeypoints
                mJsonOfstream.key("keypoints");
                mJsonOfstream.arrayOpen();
                std::vector<int> indexesInCocoOrder;

--- a/src/openpose/hand/handExtractorCaffe.cpp
+++ b/src/openpose/hand/handExtractorCaffe.cpp
@@ -414,32 +414,20 @@ namespace op
                }

                // 2. Resize heat maps + merge different scales
-                #ifdef USE_CUDA
-                    upImpl->spResizeAndMergeCaffe->Forward_gpu({upImpl->spCaffeNetOutputBlob.get()},
-                                                               {upImpl->spHeatMapsBlob.get()});
-                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
-                #elif USE_OPENCL
-                    upImpl->spResizeAndMergeCaffe->Forward_ocl({upImpl->spCaffeNetOutputBlob.get()},
-                                                               {upImpl->spHeatMapsBlob.get()});
-                #else
-                    upImpl->spResizeAndMergeCaffe->Forward_cpu({upImpl->spCaffeNetOutputBlob.get()},
-                                                               {upImpl->spHeatMapsBlob.get()});
-                #endif
+                upImpl->spResizeAndMergeCaffe->Forward(
+                    {upImpl->spCaffeNetOutputBlob.get()}, {upImpl->spHeatMapsBlob.get()});

                // 3. Get peaks by Non-Maximum Suppression
-                #ifdef USE_CUDA
-                    upImpl->spMaximumCaffe->Forward_gpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});
-                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
-                #elif USE_OPENCL
-                    // CPU Version is already very fast (4ms) and data is sent to connectKeypoints as CPU for now anyway
-                    upImpl->spMaximumCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});
-                #else
-                    upImpl->spMaximumCaffe->Forward_cpu({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});
-                #endif
+                upImpl->spMaximumCaffe->Forward({upImpl->spHeatMapsBlob.get()}, {upImpl->spPeaksBlob.get()});

                // Estimate keypoint locations
                connectKeypoints(handCurrent, person, affineMatrix,
                                 upImpl->spPeaksBlob->mutable_cpu_data());
+
+                // 5. CUDA sanity check
+                #ifdef USE_CUDA
+                    cudaCheck(__LINE__, __FUNCTION__, __FILE__);
+                #endif
            #else
                UNUSED(handCurrent);
                UNUSED(person);

--- a/src/openpose/hand/handGpuRenderer.cpp
+++ b/src/openpose/hand/handGpuRenderer.cpp
@@ -18,7 +18,7 @@ namespace op
    {
        try
        {
-            // Free CUDA pointers - Note that if pointers are 0 (i.e. nullptr), no operation is performed.
+            // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
            #ifdef USE_CUDA
                cudaFree(pGpuHand);
            #endif

--- a/src/openpose/net/bodyPartConnectorCaffe.cpp
+++ b/src/openpose/net/bodyPartConnectorCaffe.cpp
@@ -153,6 +153,26 @@ namespace op
        }
    }

+    template <typename T>
+    void BodyPartConnectorCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
+                                            Array<T>& poseScores)
+    {
+        try
+        {
+            // CUDA
+            #ifdef USE_CUDA
+                Forward_gpu(bottom, poseKeypoints, poseScores);
+            // OpenCL or CPU
+            #else
+                Forward_cpu(bottom, poseKeypoints, poseScores);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
    template <typename T>
    void BodyPartConnectorCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, Array<T>& poseKeypoints,
                                                Array<T>& poseScores)

--- a/src/openpose/net/maximumCaffe.cpp
+++ b/src/openpose/net/maximumCaffe.cpp
@@ -85,6 +85,28 @@ namespace op
        }
    }

+    template <typename T>
+    void MaximumCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom,
+                                  const std::vector<caffe::Blob<T>*>& top)
+    {
+        try
+        {
+            // CUDA
+            #ifdef USE_CUDA
+                Forward_gpu(bottom, top);
+            // OpenCL or CPU
+            #else
+                // CPU Version is already very fast (4ms)
+                Forward_cpu(bottom, top);
+            #endif
+
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
    template <typename T>
    void MaximumCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom,
                                      const std::vector<caffe::Blob<T>*>& top)

--- a/src/openpose/net/nmsCaffe.cpp
+++ b/src/openpose/net/nmsCaffe.cpp
@@ -140,6 +140,28 @@ namespace op
        }
    }

+    template <typename T>
+    void NmsCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
+    {
+        try
+        {
+            // CUDA
+            #ifdef USE_CUDA
+                Forward_gpu(bottom, top);
+            // OpenCL
+            #elif USE_OPENCL
+                Forward_ocl(bottom, top);
+            // CPU
+            #else
+                Forward_cpu(bottom, top);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
    template <typename T>
    void NmsCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom, const std::vector<caffe::Blob<T>*>& top)
    {

--- a/src/openpose/net/resizeAndMergeCaffe.cpp
+++ b/src/openpose/net/resizeAndMergeCaffe.cpp
@@ -125,6 +125,29 @@ namespace op
        }
    }

+    template <typename T>
+    void ResizeAndMergeCaffe<T>::Forward(const std::vector<caffe::Blob<T>*>& bottom,
+                                         const std::vector<caffe::Blob<T>*>& top)
+    {
+        try
+        {
+            // CUDA
+            #ifdef USE_CUDA
+                Forward_gpu(bottom, top);
+            // OpenCL
+            #elif USE_OPENCL
+                Forward_ocl(bottom, top);
+            // CPU
+            #else
+                Forward_cpu(bottom, top);
+            #endif
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
    template <typename T>
    void ResizeAndMergeCaffe<T>::Forward_cpu(const std::vector<caffe::Blob<T>*>& bottom,
                                             const std::vector<caffe::Blob<T>*>& top)

--- a/src/openpose/pose/poseExtractorCaffe.cpp
+++ b/src/openpose/pose/poseExtractorCaffe.cpp
--- a/src/openpose/pose/poseGpuRenderer.cpp
+++ b/src/openpose/pose/poseGpuRenderer.cpp
@@ -30,7 +30,7 @@ namespace op
    {
        try
        {
-            // Free CUDA pointers - Note that if pointers are 0 (i.e. nullptr), no operation is performed.
+            // Free CUDA pointers - Note that if pointers are 0 (i.e., nullptr), no operation is performed.
            #ifdef USE_CUDA
                cudaFree(pGpuPose);
            #endif

--- a/src/openpose/producer/producer.cpp
+++ b/src/openpose/producer/producer.cpp
@@ -106,7 +106,7 @@ namespace op
            else
            {
                check(fpsMode == ProducerFpsMode::RetrievalFps || get(CV_CAP_PROP_FPS) > 0,
-                      "Selected to keep the source fps but get(CV_CAP_PROP_FPS) <= 0, i.e. the source did not set"
+                      "Selected to keep the source fps but get(CV_CAP_PROP_FPS) <= 0, i.e., the source did not set"
                      " its fps property.", __LINE__, __FUNCTION__, __FILE__);
                mProducerFpsMode = {fpsMode};
            }
@@ -263,7 +263,7 @@ namespace op
            if (isOpened())
            {
                // OpenCV closing issue: OpenCV goes in the range [1, get(CV_CAP_PROP_FRAME_COUNT) - 1] in some
-                // videos (i.e. there is a frame missing), mNumberEmptyFrames allows the program to be properly
+                // videos (i.e., there is a frame missing), mNumberEmptyFrames allows the program to be properly
                // closed keeping the 0-index frame counting
                if (mNumberEmptyFrames > 2
                    || (mType != ProducerType::FlirCamera && mType != ProducerType::IPCamera

--- a/src/openpose/tracking/pyramidalLK.cu
+++ b/src/openpose/tracking/pyramidalLK.cu
@@ -4,9 +4,11 @@
    #include <cuda_runtime.h>
    #include <cuda_runtime_api.h>
    #include <opencv2/opencv.hpp>
+    // OpenCV 2.X
    #if (defined(CV_VERSION_EPOCH) && CV_VERSION_EPOCH == 2)
        #include <opencv2/gpu/gpu.hpp>
        #define cvCuda cv::gpu
+    // OpenCV 3.X
    #else
        #include <opencv2/core/cuda.hpp>
        #include <opencv2/cudaimgproc.hpp>

--- a/src/openpose/utilities/keypoint.cpp
+++ b/src/openpose/utilities/keypoint.cpp
@@ -107,7 +107,7 @@ namespace op
    {
        try
        {
-            if (!keypoints.empty() && scaleX != T(1) && scaleY != T(1))
+            if (!keypoints.empty() && (scaleX != T(1) || scaleY != T(1)))
            {
                // Error check
                if (keypoints.getSize(2) != 3)
@@ -141,7 +141,7 @@ namespace op
    {
        try
        {
-            if (!keypoints.empty() && scaleX != T(1) && scaleY != T(1))
+            if (!keypoints.empty() && (scaleX != T(1) || scaleY != T(1) || offsetX != T(0) || offsetY != T(0)))
            {
                // Error check
                if (keypoints.getSize(2) != 3)
@@ -402,4 +402,168 @@ namespace op
    }
    template OP_API int getBiggestPerson(const Array<float>& keypoints, const float threshold);
    template OP_API int getBiggestPerson(const Array<double>& keypoints, const double threshold);
+
+    template <typename T>
+    int getNonZeroKeypoints(const Array<T>& keypoints, const int person, const T threshold)
+    {
+        try
+        {
+            if (!keypoints.empty())
+            {
+                // Sanity check
+                if (keypoints.getSize(0) <= person)
+                    error("Person index out of range.", __LINE__, __FUNCTION__, __FILE__);
+                // Count keypoints
+                auto nonZeroCounter = 0;
+                const auto baseIndex = person * keypoints.getVolume(1,2);
+                for (auto part = 0 ; part < keypoints.getSize(1) ; part++)
+                    if (keypoints[baseIndex + 3*part + 2] >= threshold)
+                        nonZeroCounter++;
+                return nonZeroCounter;
+            }
+            else
+                return 0;
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return 0;
+        }
+    }
+    template OP_API int getNonZeroKeypoints(const Array<float>& keypoints, const int person, const float threshold);
+    template OP_API int getNonZeroKeypoints(const Array<double>& keypoints, const int person, const double threshold);
+
+    template <typename T>
+    T getDistanceAverage(const Array<T>& keypoints, const int personA, const int personB, const T threshold)
+    {
+        try
+        {
+            return getDistanceAverage(keypoints, personA, keypoints, personB, threshold);
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return T(0);
+        }
+    }
+    template OP_API float getDistanceAverage(
+        const Array<float>& keypoints, const int personA, const int personB, const float threshold);
+    template OP_API double getDistanceAverage(
+        const Array<double>& keypoints, const int personA, const int personB, const double threshold);
+
+    template <typename T>
+    T getDistanceAverage(const Array<T>& keypointsA, const int personA, const Array<T>& keypointsB, const int personB,
+                         const T threshold)
+    {
+        try
+        {
+            // Sanity checks
+            if (keypointsA.getSize(0) <= personA)
+                error("PersonA index out of range.", __LINE__, __FUNCTION__, __FILE__);
+            if (keypointsB.getSize(0) <= personB)
+                error("PersonB index out of range.", __LINE__, __FUNCTION__, __FILE__);
+            if (keypointsA.getSize(1) != keypointsB.getSize(1))
+                error("Keypoints should have the same number of keypoints.", __LINE__, __FUNCTION__, __FILE__);
+            // Get total distance
+            T totalDistance = 0;
+            int nonZeroCounter = 0;
+            const auto baseIndexA = personA * keypointsA.getVolume(1,2);
+            const auto baseIndexB = personB * keypointsB.getVolume(1,2);
+            for (auto part = 0 ; part < keypointsA.getSize(1) ; part++)
+            {
+                if (keypointsA[baseIndexA+3*part+2] >= threshold && keypointsB[baseIndexB+3*part+2] >= threshold)
+                {
+                    const auto x = keypointsA[baseIndexA+3*part] - keypointsB[baseIndexB+3*part];
+                    const auto y = keypointsA[baseIndexA+3*part+1] - keypointsB[baseIndexB+3*part+1];
+                    totalDistance += T(std::sqrt(x*x+y*y));
+                    nonZeroCounter++;
+                }
+            }
+            // Get distance average
+            return totalDistance / nonZeroCounter;
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return T(0);
+        }
+    }
+    template OP_API float getDistanceAverage(
+        const Array<float>& keypointsA, const int personA, const Array<float>& keypointsB, const int personB,
+        const float threshold);
+    template OP_API double getDistanceAverage(
+        const Array<double>& keypointsA, const int personA, const Array<double>& keypointsB, const int personB,
+        const double threshold);
+
+    template <typename T>
+    float getKeypointsROI(const Array<T>& keypoints, const int personA, const int personB, const T threshold)
+    {
+        try
+        {
+            return getKeypointsROI(keypoints, personA, keypoints, personB, threshold);
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return 0.f;
+        }
+    }
+    template OP_API float getKeypointsROI(
+        const Array<float>& keypoints, const int personA, const int personB, const float threshold);
+    template OP_API float getKeypointsROI(
+        const Array<double>& keypoints, const int personA, const int personB, const double threshold);
+
+    template <typename T>
+    float getKeypointsROI(const Array<T>& keypointsA, const int personA, const Array<T>& keypointsB, const int personB,
+                          const T threshold)
+    {
+        try
+        {
+            // Sanity checks
+            if (keypointsA.getSize(0) <= personA)
+                error("PersonA index out of range.", __LINE__, __FUNCTION__, __FILE__);
+            if (keypointsB.getSize(0) <= personB)
+                error("PersonB index out of range.", __LINE__, __FUNCTION__, __FILE__);
+            if (keypointsA.getSize(1) != keypointsB.getSize(1))
+                error("Keypoints should have the same number of keypoints.", __LINE__, __FUNCTION__, __FILE__);
+            // Get ROI
+            const auto rectangleA = getKeypointsRectangle(keypointsA, personA, threshold);
+            const auto rectangleB = getKeypointsRectangle(keypointsB, personB, threshold);
+            const Point<T> pointAIntersection{
+                fastMax(rectangleA.x, rectangleB.x),
+                fastMax(rectangleA.y, rectangleB.y)
+            };
+            const Point<T> pointBIntersection{
+                fastMin(rectangleA.x+rectangleA.width, rectangleB.x+rectangleB.width),
+                fastMin(rectangleA.y+rectangleA.height, rectangleB.y+rectangleB.height)
+            };
+            // Make sure there is overlap
+            if (pointAIntersection.x < pointBIntersection.x && pointAIntersection.y < pointBIntersection.y)
+            {
+                const Rectangle<T> rectangleIntersection{
+                    pointAIntersection.x,
+                    pointAIntersection.y,
+                    pointBIntersection.x-pointAIntersection.x,
+                    pointBIntersection.y-pointAIntersection.y
+                };
+                const auto areaA = rectangleA.area();
+                const auto areaB = rectangleB.area();
+                const auto intersection = rectangleIntersection.area();
+                return float(intersection) / float(areaA + areaB - intersection);
+            }
+            // If non overlap --> Return 0
+            return 0.f;
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+            return 0.f;
+        }
+    }
+    template OP_API float getKeypointsROI(
+        const Array<float>& keypointsA, const int personA, const Array<float>& keypointsB, const int personB,
+        const float threshold);
+    template OP_API float getKeypointsROI(
+        const Array<double>& keypointsA, const int personA, const Array<double>& keypointsB, const int personB,
+        const double threshold);
 }
--- a/src/openpose/utilities/openCv.cpp
+++ b/src/openpose/utilities/openCv.cpp
@@ -158,27 +158,54 @@ namespace op
        }
    }

-    cv::Mat resizeFixedAspectRatio(const cv::Mat& cvMat, const double scaleFactor, const Point<int>& targetSize,
-                                   const int borderMode, const cv::Scalar& borderValue)
+    void resizeFixedAspectRatio(cv::Mat& resizedCvMat, const cv::Mat& cvMat, const double scaleFactor,
+                                const Point<int>& targetSize, const int borderMode, const cv::Scalar& borderValue)
    {
        try
        {
            const cv::Size cvTargetSize{targetSize.x, targetSize.y};
-            cv::Mat resultingCvMat;
            cv::Mat M = cv::Mat::eye(2,3,CV_64F);
            M.at<double>(0,0) = scaleFactor;
            M.at<double>(1,1) = scaleFactor;
            if (scaleFactor != 1. || cvTargetSize != cvMat.size())
-                cv::warpAffine(cvMat, resultingCvMat, M, cvTargetSize,
-                               (scaleFactor < 1. ? cv::INTER_AREA : cv::INTER_CUBIC), borderMode, borderValue);
+                cv::warpAffine(cvMat, resizedCvMat, M, cvTargetSize,
+                               (scaleFactor > 1. ? cv::INTER_CUBIC : cv::INTER_AREA), borderMode, borderValue);
            else
-                resultingCvMat = cvMat.clone();
-            return resultingCvMat;
+                cvMat.copyTo(resizedCvMat);
+        }
+        catch (const std::exception& e)
+        {
+            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+        }
+    }
+
+    void keepRoiInside(cv::Rect& roi, const int imageWidth, const int imageHeight)
+    {
+        try
+        {
+            // x,y < 0
+            if (roi.x < 0)
+            {
+                roi.width += roi.x;
+                roi.x = 0;
+            }
+            if (roi.y < 0)
+            {
+                roi.height += roi.y;
+                roi.y = 0;
+            }
+            // Bigger than image
+            if (roi.width + roi.x >= imageWidth)
+                roi.width = imageWidth - 1 - roi.x;
+            if (roi.height + roi.y >= imageHeight)
+                roi.height = imageHeight - 1 - roi.y;
+            // Width/height negative
+            roi.width = fastMax(0, roi.width);
+            roi.height = fastMax(0, roi.height);
        }
        catch (const std::exception& e)
        {
            error(e.what(), __LINE__, __FUNCTION__, __FILE__);
-            return cv::Mat();
        }
    }
 }
--- a/src/openpose/wrapper/wrapperAuxiliary.cpp
+++ b/src/openpose/wrapper/wrapperAuxiliary.cpp
@@ -45,9 +45,9 @@ namespace op
                        wrapperStructOutput.writeHeatMapsFormat != "float"))
            {
                const auto message = "In order to save the heatmaps, you must either set"
-                                     " wrapperStructPose.heatMapScale to ScaleMode::UnsignedChar (i.e. range [0, 255])"
-                                     " or `--write_heatmaps_format` to `float` to storage floating numbers in binary"
-                                     " mode.";
+                                     " wrapperStructPose.heatMapScale to ScaleMode::UnsignedChar (i.e., range"
+                                     " [0, 255]) or `--write_heatmaps_format` to `float` to storage floating numbers"
+                                     " in binary mode.";
                error(message, __LINE__, __FUNCTION__, __FILE__);
            }
            if (userOutputWsEmpty && threadManagerMode != ThreadManagerMode::Asynchronous