Greedy LK matching + AVX/SSE4 optimizations (#597)

- Greedy/Matrix version of LucasKanade vs OpenPose matcher in identification - AVX and SSE4 optimizations flags.

Greedy LK matching + AVX/SSE4 optimizations (#597)
- Greedy/Matrix version of LucasKanade vs OpenPose matcher in identification - AVX and SSE4 optimizations flags.
d9e745a0 · fragalfernando · Gines · 825dfcae · d9e745a0 · d9e745a0
4 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -59,8 +59,23 @@ endif (WIN32)

 # C++ additional flags
 if (CMAKE_COMPILER_IS_GNUCXX)
+
+  # Select the Enhanced Instruction Set
+  set(INSTRUCTION_SET NONE CACHE STRING "Enable Enhanced Instruction Set")
+  set_property(CACHE INSTRUCTION_SET PROPERTY STRINGS NONE SSE4 AVX)
+
+  if (${INSTRUCTION_SET} MATCHES "SSE4")
+    add_definitions("-DWITH_SSE4")
+    set(SIMD_FLAGS "${SIMD_FLAGS} -msse4.1")
+  endif(${INSTRUCTION_SET} MATCHES "SSE4")
+
+  if (${INSTRUCTION_SET} MATCHES "AVX")
+    add_definitions("-DWITH_AVX")
+    set(SIMD_FLAGS "${SIMD_FLAGS} -mavx")
+  endif(${INSTRUCTION_SET} MATCHES "AVX")
+
  message(STATUS "GCC detected, adding compile flags")
-  set(OP_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp -Wpedantic -Wall -Wextra -Wfatal-errors")
+  set(OP_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${SIMD_FLAGS} -fopenmp -Wpedantic -Wall -Wextra -Wfatal-errors")
 endif (CMAKE_COMPILER_IS_GNUCXX)



--- a/include/openpose/experimental/tracking/personIdExtractor.hpp
+++ b/include/openpose/experimental/tracking/personIdExtractor.hpp
@@ -2,7 +2,9 @@
 #define OPENPOSE_TRACKING_PERSON_ID_EXTRACTOR_HPP

 #include <atomic>
+#include <tuple>
 #include <unordered_map>
+#include <unordered_set>
 #include <openpose/core/common.hpp>

 namespace op

--- a/src/openpose/experimental/tracking/personIdExtractor.cpp
+++ b/src/openpose/experimental/tracking/personIdExtractor.cpp
@@ -108,9 +108,9 @@ namespace op
    }

    void initializeLK(std::unordered_map<int,PersonEntry>& personEntries,
-                     long long& mNextPersonId,
-                     const Array<float>& poseKeypoints,
-                     const float confidenceThreshold)
+                      long long& mNextPersonId,
+                      const Array<float>& poseKeypoints,
+                      const float confidenceThreshold)
    {
        try
        {
@@ -142,36 +142,53 @@ namespace op
        }
    }

-    Array<long long> matchLKAndOP(std::unordered_map<int,PersonEntry>& personEntries,
-                                  long long& nextPersonId,
-                                  const std::vector<PersonEntry>& openposePersonEntries,
-                                  const cv::Mat& imagePrevious,
-                                  const float inlierRatioThreshold,
-                                  const float distanceThreshold)
+    bool compareCandidates(std::tuple<float, int, int> a, std::tuple<float, int, int> b)
+    {
+        return std::get<0>(a) > std::get<0>(b);
+    }
+
+    Array<long long> matchLKAndOPGreedy(std::unordered_map<int,PersonEntry>& personEntries,
+                                        long long& nextPersonId,
+                                        const std::vector<PersonEntry>& openposePersonEntries,
+                                        const cv::Mat& imagePrevious,
+                                        const float inlierRatioThreshold,
+                                        const float distanceThreshold)
    {
        try
        {
            Array<long long> poseIds{(int)openposePersonEntries.size(), -1};
            std::unordered_map<int, PersonEntry> pendingQueue;
+            std::vector<bool> processed((int)openposePersonEntries.size(), false);
+            std::unordered_set<int> used;
+            bool converged = false;

-            if (!openposePersonEntries.empty())
+            while (!openposePersonEntries.empty() && !converged)
            {
                const auto numberKeypoints = openposePersonEntries[0].keypoints.size();
+                std::vector<std::tuple<float, int, int>> candidates;
+                float bestScore = 0.0f;
+                converged = true;
+
                for (auto i = 0u; i < openposePersonEntries.size(); i++)
                {
-                    auto& poseId = poseIds.at(i);
+                    if (poseIds.at(i) != -1)
+                        continue;
+
                    const auto& openposePersonEntry = openposePersonEntries.at(i);
                    const auto personDistanceThreshold = fastMax(10.f,
                        distanceThreshold*float(std::sqrt(imagePrevious.cols*imagePrevious.rows)) / 960.f);

                    // Find best correspondance in the LK set
-                    auto bestMatch = -1ll;
-                    auto bestScore = 0.f;
                    for (const auto& personEntry : personEntries)
                    {
+                        if (used.find(personEntry.first) != used.end())
+                            continue;
+
                        const auto& element = personEntry.second;
                        auto inliers = 0;
                        auto active = 0;
+                        auto distance = 0.f;
+                        auto total_distance = 0.0f;

                        // Security checks
                        if (element.status.size() != numberKeypoints)
@@ -184,6 +201,7 @@ namespace op
                        if (openposePersonEntry.keypoints.size() != numberKeypoints)
                            error("openposePersonEntry.keypoints.size() != numberKeypoints",
                                  __LINE__, __FUNCTION__, __FILE__);
+
                        // Iterate through all keypoints
                        for (auto kp = 0u; kp < numberKeypoints; kp++)
                        {
@@ -191,8 +209,10 @@ namespace op
                            if (!element.status[kp] && !openposePersonEntry.status[kp])
                            {
                                active++;
-                                const auto distance = getEuclideanDistance(element.keypoints[kp],
-                                                                           openposePersonEntry.keypoints[kp]);
+                                distance = getEuclideanDistance(element.keypoints[kp],
+                                                                openposePersonEntry.keypoints[kp]);
+                                total_distance += distance;
+
                                if (distance < personDistanceThreshold)
                                    inliers++;
                            }
@@ -201,26 +221,46 @@ namespace op
                        if (active > 0)
                        {
                            const auto score = inliers / (float)active;
-                            if (score > bestScore && score >= inlierRatioThreshold)
+
+                            if (score == bestScore && score >= inlierRatioThreshold)
+                            {
+                                candidates.push_back(std::make_tuple(total_distance,i, personEntry.first));
+                                bestScore = score;
+                            }
+                            else if (score > bestScore && score >= inlierRatioThreshold)
                            {
                                bestScore = score;
-                                bestMatch = personEntry.first;
+                                candidates.clear();
+                                candidates.push_back(std::make_tuple(total_distance, i, personEntry.first));
                            }
                        }
                    }
-                    // Found a best match, update LK table and poseIds
-                    if (bestMatch != -1)
-                        poseId = bestMatch;
-                    else
-                        poseId = nextPersonId++;
+                }
+                std::sort(candidates.begin(), candidates.end(), compareCandidates);
+
+                while (candidates.size())
+                {
+                    auto top_candidate = candidates.back();
+                    candidates.pop_back();
+                    auto idx_lk = std::get<2>(top_candidate);
+                    auto idx_op = std::get<1>(top_candidate);
+
+                    if (used.find(idx_lk) != used.end())
+                        continue;
+
+                    poseIds[idx_op] = idx_lk;
+                    used.insert(idx_lk);
+                    converged = false;

-                    pendingQueue[poseId] = openposePersonEntry;
                }
            }
-
-            // Update LK table with pending queue
-            for (auto& pendingQueueEntry: pendingQueue)
-                personEntries[pendingQueueEntry.first] = pendingQueueEntry.second;
+            for (auto i = 0u; i < openposePersonEntries.size(); i++)
+            {
+                if (poseIds[i] == -1)
+                    poseIds[i] = nextPersonId++;
+                const auto& openposePersonEntry = openposePersonEntries.at(i);
+                personEntries[poseIds[i]] = openposePersonEntry;
+            }

            return poseIds;
        }
@@ -231,6 +271,97 @@ namespace op
        }
    }

+
+    // Array<long long> matchLKAndOP(std::unordered_map<int,PersonEntry>& personEntries,
+    //                               long long& nextPersonId,
+    //                               const std::vector<PersonEntry>& openposePersonEntries,
+    //                               const cv::Mat& imagePrevious,
+    //                               const float inlierRatioThreshold,
+    //                               const float distanceThreshold)
+    // {
+    //     try
+    //     {
+    //         Array<long long> poseIds{(int)openposePersonEntries.size(), -1};
+    //         std::unordered_map<int, PersonEntry> pendingQueue;
+
+    //         if (!openposePersonEntries.empty())
+    //         {
+    //             const auto numberKeypoints = openposePersonEntries[0].keypoints.size();
+    //             for (auto i = 0u; i < openposePersonEntries.size(); i++)
+    //             {
+    //                 auto& poseId = poseIds.at(i);
+
+    //                 const auto& openposePersonEntry = openposePersonEntries.at(i);
+    //                 const auto personDistanceThreshold = fastMax(10.f,
+    //                     distanceThreshold*float(std::sqrt(imagePrevious.cols*imagePrevious.rows)) / 960.f);
+
+    //                 // Find best correspondance in the LK set
+    //                 auto bestMatch = -1ll;
+    //                 auto bestScore = 0.f;
+    //                 for (const auto& personEntry : personEntries)
+    //                 {
+    //                     const auto& element = personEntry.second;
+    //                     auto inliers = 0;
+    //                     auto active = 0;
+
+    //                     // Security checks
+    //                     if (element.status.size() != numberKeypoints)
+    //                         error("element.status.size() != numberKeypoints ||", __LINE__, __FUNCTION__, __FILE__);
+    //                     if (openposePersonEntry.status.size() != numberKeypoints)
+    //                         error("openposePersonEntry.status.size() != numberKeypoints",
+    //                               __LINE__, __FUNCTION__, __FILE__);
+    //                     if (element.keypoints.size() != numberKeypoints)
+    //                         error("element.keypoints.size() != numberKeypoints ||", __LINE__, __FUNCTION__, __FILE__);
+    //                     if (openposePersonEntry.keypoints.size() != numberKeypoints)
+    //                         error("openposePersonEntry.keypoints.size() != numberKeypoints",
+    //                               __LINE__, __FUNCTION__, __FILE__);
+    //                     // Iterate through all keypoints
+    //                     for (auto kp = 0u; kp < numberKeypoints; kp++)
+    //                     {
+    //                         // If enough threshold
+    //                         if (!element.status[kp] && !openposePersonEntry.status[kp])
+    //                         {
+    //                             active++;
+    //                             const auto distance = getEuclideanDistance(element.keypoints[kp],
+    //                                                                        openposePersonEntry.keypoints[kp]);
+    //                             if (distance < personDistanceThreshold)
+    //                                 inliers++;
+    //                         }
+    //                     }
+
+    //                     if (active > 0)
+    //                     {
+    //                         const auto score = inliers / (float)active;
+    //                         if (score > bestScore && score >= inlierRatioThreshold)
+    //                         {
+    //                             bestScore = score;
+    //                             bestMatch = personEntry.first;
+    //                         }
+    //                     }
+    //                 }
+    //                 // Found a best match, update LK table and poseIds
+    //                 if (bestMatch != -1)
+    //                     poseId = bestMatch;
+    //                 else
+    //                     poseId = nextPersonId++;
+
+    //                 pendingQueue[poseId] = openposePersonEntry;
+    //             }
+    //         }
+
+    //         // Update LK table with pending queue
+    //         for (auto& pendingQueueEntry: pendingQueue)
+    //             personEntries[pendingQueueEntry.first] = pendingQueueEntry.second;
+
+    //         return poseIds;
+    //     }
+    //     catch (const std::exception& e)
+    //     {
+    //         error(e.what(), __LINE__, __FUNCTION__, __FILE__);
+    //         return Array<long long>{};
+    //     }
+    // }
+
    PersonIdExtractor::PersonIdExtractor(const float confidenceThreshold, const float inlierRatioThreshold,
                                         const float distanceThreshold, const int numberFramesToDeletePerson) :
        mConfidenceThreshold{confidenceThreshold},
@@ -290,8 +421,10 @@ namespace op
            }

            // Get poseIds and update LKset according to OpenPose set
-            poseIds = matchLKAndOP(mPersonEntries, mNextPersonId, openposePersonEntries, mImagePrevious,
-                                   mInlierRatioThreshold, mDistanceThreshold);
+            // poseIds = matchLKAndOP(
+            poseIds = matchLKAndOPGreedy(
+                mPersonEntries, mNextPersonId, openposePersonEntries, mImagePrevious, mInlierRatioThreshold,
+                mDistanceThreshold);

            return poseIds;
        }

--- a/src/openpose/experimental/tracking/pyramidalLK.cpp
+++ b/src/openpose/experimental/tracking/pyramidalLK.cpp
@@ -5,12 +5,12 @@
 #include <openpose/utilities/profiler.hpp>
 #include <openpose/experimental/tracking/pyramidalLK.hpp>

-#if defined ( __SSE4_1__)
+#if defined (WITH_SSE4)
 #include <emmintrin.h>
 #include "smmintrin.h"
 #endif

-#if defined (__AVX__)
+#if defined (WITH_AVX)
 #include <immintrin.h>
 #endif

@@ -32,7 +32,7 @@

 namespace op
 {
-#if defined ( __SSE4_1__)
+#if defined (WITH_SSE4)
    float sse_dot_product(std::vector<float> &av, std::vector<float> &bv)
    {

@@ -68,7 +68,7 @@ namespace op
    }
 #endif

-#if defined (__AVX__)
+#if defined (WITH_AVX)

    float avx_dot_product(std::vector<float> &av, std::vector<float> &bv)
    {
@@ -114,13 +114,13 @@ namespace op
            auto sumYT = 0.f;
            auto sumXY = 0.f;

-#if defined (__AVX__)
+#if defined (WITH_AVX)
            sumXX = avx_dot_product(ix,ix);
            sumYY = avx_dot_product(iy,iy);
            sumXY = avx_dot_product(ix,iy);
            sumXT = avx_dot_product(ix,it);
            sumYT = avx_dot_product(iy,it);
-#elif defined ( __SSE4_1__)
+#elif defined (WITH_SSE4)
            sumXX = sse_dot_product(ix,ix);
            sumYY = sse_dot_product(iy,iy);
            sumXY = sse_dot_product(ix,iy);