Merge pull request #20957 from sturkmen72:update-documentation

Update documentation * Update DNN-based Face Detection And Recognition tutorial * samples(dnn/face): update face_detect.cpp * final changes Co-authored-by: N Alexander Alekhin <alexander.a.alekhin@gmail.com>

Merge pull request #20957 from sturkmen72:update-documentation
Update documentation * Update DNN-based Face Detection And Recognition tutorial * samples(dnn/face): update face_detect.cpp * final changes Co-authored-by: N Alexander Alekhin <alexander.a.alekhin@gmail.com>
a97f21ba · Suleyman TURKMEN · GitHub · b594ed99 · a97f21ba · a97f21ba
5 changed file
--- a/doc/tutorials/dnn/dnn_face/dnn_face.markdown
+++ b/doc/tutorials/dnn/dnn_face/dnn_face.markdown
@@ -36,14 +36,34 @@ There are two models (ONNX format) pre-trained and required for this module:

 ### DNNFaceDetector

-```cpp
-// Initialize FaceDetectorYN
-Ptr<FaceDetectorYN> faceDetector = FaceDetectorYN::create(onnx_path, "", image.size(), score_thresh, nms_thresh, top_k);
+@add_toggle_cpp
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.cpp)

-// Forward
-Mat faces;
-faceDetector->detect(image, faces);
-```
+-   **Code at glance:**
+    @include samples/dnn/face_detect.cpp
+@end_toggle
+
+@add_toggle_python
+-   **Downloadable code**: Click
+    [here](https://github.com/opencv/opencv/tree/master/samples/dnn/face_detect.py)
+
+-   **Code at glance:**
+    @include samples/dnn/face_detect.py
+@end_toggle
+
+Explanation
+-----------
+
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp initialize_FaceDetectorYN
+@snippet dnn/face_detect.cpp inference
+@end_toggle
+
+@add_toggle_python
+@snippet dnn/face_detect.py initialize_FaceDetectorYN
+@snippet dnn/face_detect.py inference
+@end_toggle

 The detection output `faces` is a two-dimension array of type CV_32F, whose rows are the detected face instances, columns are the location of a face and 5 facial landmarks. The format of each row is as follows:

@@ -57,28 +77,25 @@ x1, y1, w, h, x_re, y_re, x_le, y_le, x_nt, y_nt, x_rcm, y_rcm, x_lcm, y_lcm

 Following Face Detection, run codes below to extract face feature from facial image.

-```cpp
-// Initialize FaceRecognizerSF with model path (cv::String)
-Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(model_path, "");
-
-// Aligning and cropping facial image through the first face of faces detected by dnn_face::DNNFaceDetector
-Mat aligned_face;
-faceRecognizer->alignCrop(image, faces.row(0), aligned_face);
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp initialize_FaceRecognizerSF
+@snippet dnn/face_detect.cpp facerecognizer
+@end_toggle

-// Run feature extraction with given aligned_face (cv::Mat)
-Mat feature;
-faceRecognizer->feature(aligned_face, feature);
-feature = feature.clone();
-```
+@add_toggle_python
+@snippet dnn/face_detect.py initialize_FaceRecognizerSF
+@snippet dnn/face_detect.py facerecognizer
+@end_toggle

 After obtaining face features *feature1* and *feature2* of two facial images, run codes below to calculate the identity discrepancy between the two faces.

-```cpp
-// Calculating the discrepancy between two face features by using cosine distance.
-double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::COSINE);
-// Calculating the discrepancy between two face features by using normL2 distance.
-double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizer::DisType::NORM_L2);
-```
+@add_toggle_cpp
+@snippet dnn/face_detect.cpp match
+@end_toggle
+
+@add_toggle_python
+@snippet dnn/face_detect.py match
+@end_toggle

 For example, two faces have same identity if the cosine distance is greater than or equal to 0.363, or the normL2 distance is less than or equal to 1.128.


--- a/samples/dnn/face_detect.cpp
+++ b/samples/dnn/face_detect.cpp
@@ -8,125 +8,272 @@
 using namespace cv;
 using namespace std;

-static Mat visualize(Mat input, Mat faces, int thickness=2)
+static
+void visualize(Mat& input, int frame, Mat& faces, double fps, int thickness = 2)
 {
-    Mat output = input.clone();
+    std::string fpsString = cv::format("FPS : %.2f", (float)fps);
+    if (frame >= 0)
+        cout << "Frame " << frame << ", ";
+    cout << "FPS: " << fpsString << endl;
    for (int i = 0; i < faces.rows; i++)
    {
        // Print results
        cout << "Face " << i
             << ", top-left coordinates: (" << faces.at<float>(i, 0) << ", " << faces.at<float>(i, 1) << "), "
             << "box width: " << faces.at<float>(i, 2)  << ", box height: " << faces.at<float>(i, 3) << ", "
-             << "score: " << faces.at<float>(i, 14) << "\n";
+             << "score: " << cv::format("%.2f", faces.at<float>(i, 14))
+             << endl;

        // Draw bounding box
-        rectangle(output, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
+        rectangle(input, Rect2i(int(faces.at<float>(i, 0)), int(faces.at<float>(i, 1)), int(faces.at<float>(i, 2)), int(faces.at<float>(i, 3))), Scalar(0, 255, 0), thickness);
        // Draw landmarks
-        circle(output, Point2i(int(faces.at<float>(i, 4)),  int(faces.at<float>(i, 5))),  2, Scalar(255,   0,   0), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 6)),  int(faces.at<float>(i, 7))),  2, Scalar(  0,   0, 255), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 8)),  int(faces.at<float>(i, 9))),  2, Scalar(  0, 255,   0), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255,   0, 255), thickness);
-        circle(output, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(  0, 255, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 4)), int(faces.at<float>(i, 5))), 2, Scalar(255, 0, 0), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 6)), int(faces.at<float>(i, 7))), 2, Scalar(0, 0, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 8)), int(faces.at<float>(i, 9))), 2, Scalar(0, 255, 0), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 10)), int(faces.at<float>(i, 11))), 2, Scalar(255, 0, 255), thickness);
+        circle(input, Point2i(int(faces.at<float>(i, 12)), int(faces.at<float>(i, 13))), 2, Scalar(0, 255, 255), thickness);
    }
-    return output;
+    putText(input, fpsString, Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
 }

-int main(int argc, char ** argv)
+int main(int argc, char** argv)
 {
    CommandLineParser parser(argc, argv,
-        "{help  h           |            | Print this message.}"
-        "{input i           |            | Path to the input image. Omit for detecting on default camera.}"
-        "{model m           | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.}"
-        "{score_threshold   | 0.9        | Filter out faces of score < score_threshold.}"
-        "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold.}"
-        "{top_k             | 5000       | Keep top_k bounding boxes before NMS.}"
-        "{save  s           | false      | Set true to save results. This flag is invalid when using camera.}"
-        "{vis   v           | true       | Set true to open a window for result visualization. This flag is invalid when using camera.}"
+        "{help  h           |            | Print this message}"
+        "{image1 i1         |            | Path to the input image1. Omit for detecting through VideoCapture}"
+        "{image2 i2         |            | Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm}"
+        "{video v           | 0          | Path to the input video}"
+        "{scale sc          | 1.0        | Scale factor used to resize input video frames}"
+        "{fd_model fd       | yunet.onnx | Path to the model. Download yunet.onnx in https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx }"
+        "{fr_model fr       | face_recognizer_fast.onnx | Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view}"
+        "{score_threshold   | 0.9        | Filter out faces of score < score_threshold}"
+        "{nms_threshold     | 0.3        | Suppress bounding boxes of iou >= nms_threshold}"
+        "{top_k             | 5000       | Keep top_k bounding boxes before NMS}"
+        "{save s            | false      | Set true to save results. This flag is invalid when using camera}"
    );
-    if (argc == 1 || parser.has("help"))
+    if (parser.has("help"))
    {
        parser.printMessage();
-        return -1;
+        return 0;
    }

-    String modelPath = parser.get<String>("model");
+    String fd_modelPath = parser.get<String>("fd_model");
+    String fr_modelPath = parser.get<String>("fr_model");

    float scoreThreshold = parser.get<float>("score_threshold");
    float nmsThreshold = parser.get<float>("nms_threshold");
    int topK = parser.get<int>("top_k");

    bool save = parser.get<bool>("save");
-    bool vis = parser.get<bool>("vis");

+    double cosine_similar_thresh = 0.363;
+    double l2norm_similar_thresh = 1.128;
+
+    //! [initialize_FaceDetectorYN]
    // Initialize FaceDetectorYN
-    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
+    Ptr<FaceDetectorYN> detector = FaceDetectorYN::create(fd_modelPath, "", Size(320, 320), scoreThreshold, nmsThreshold, topK);
+    //! [initialize_FaceDetectorYN]
+
+    TickMeter tm;

    // If input is an image
-    if (parser.has("input"))
+    if (parser.has("image1"))
    {
-        String input = parser.get<String>("input");
-        Mat image = imread(input);
+        String input1 = parser.get<String>("image1");
+        Mat image1 = imread(samples::findFile(input1));
+        if (image1.empty())
+        {
+            std::cerr << "Cannot read image: " << input1 << std::endl;
+            return 2;
+        }

+        tm.start();
+
+        //! [inference]
        // Set input size before inference
-        detector->setInputSize(image.size());
+        detector->setInputSize(image1.size());

-        // Inference
-        Mat faces;
-        detector->detect(image, faces);
+        Mat faces1;
+        detector->detect(image1, faces1);
+        if (faces1.rows < 1)
+        {
+            std::cerr << "Cannot find a face in " << input1 << std::endl;
+            return 1;
+        }
+        //! [inference]

+        tm.stop();
        // Draw results on the input image
-        Mat result = visualize(image, faces);
+        visualize(image1, -1, faces1, tm.getFPS());

        // Save results if save is true
-        if(save)
+        if (save)
        {
-            cout << "Results saved to result.jpg\n";
-            imwrite("result.jpg", result);
+            cout << "Saving result.jpg...\n";
+            imwrite("result.jpg", image1);
        }

        // Visualize results
-        if (vis)
+        imshow("image1", image1);
+        pollKey();  // handle UI events to show content
+
+        if (parser.has("image2"))
        {
-            namedWindow(input, WINDOW_AUTOSIZE);
-            imshow(input, result);
-            waitKey(0);
+            String input2 = parser.get<String>("image2");
+            Mat image2 = imread(samples::findFile(input2));
+            if (image2.empty())
+            {
+                std::cerr << "Cannot read image2: " << input2 << std::endl;
+                return 2;
+            }
+
+            tm.reset();
+            tm.start();
+            detector->setInputSize(image2.size());
+
+            Mat faces2;
+            detector->detect(image2, faces2);
+            if (faces2.rows < 1)
+            {
+                std::cerr << "Cannot find a face in " << input2 << std::endl;
+                return 1;
+            }
+            tm.stop();
+            visualize(image2, -1, faces2, tm.getFPS());
+            if (save)
+            {
+                cout << "Saving result2.jpg...\n";
+                imwrite("result2.jpg", image2);
+            }
+            imshow("image2", image2);
+            pollKey();
+
+            //! [initialize_FaceRecognizerSF]
+            // Initialize FaceRecognizerSF
+            Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(fr_modelPath, "");
+            //! [initialize_FaceRecognizerSF]
+
+
+            //! [facerecognizer]
+            // Aligning and cropping facial image through the first face of faces detected.
+            Mat aligned_face1, aligned_face2;
+            faceRecognizer->alignCrop(image1, faces1.row(0), aligned_face1);
+            faceRecognizer->alignCrop(image2, faces2.row(0), aligned_face2);
+
+            // Run feature extraction with given aligned_face
+            Mat feature1, feature2;
+            faceRecognizer->feature(aligned_face1, feature1);
+            feature1 = feature1.clone();
+            faceRecognizer->feature(aligned_face2, feature2);
+            feature2 = feature2.clone();
+            //! [facerecognizer]
+
+            //! [match]
+            double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
+            double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
+            //! [match]
+
+            if (cos_score >= cosine_similar_thresh)
+            {
+                std::cout << "They have the same identity;";
+            }
+            else
+            {
+                std::cout << "They have different identities;";
+            }
+            std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
+
+            if (L2_score <= l2norm_similar_thresh)
+            {
+                std::cout << "They have the same identity;";
+            }
+            else
+            {
+                std::cout << "They have different identities.";
+            }
+            std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
        }
+        cout << "Press any key to exit..." << endl;
+        waitKey(0);
    }
    else
    {
-        int deviceId = 0;
-        VideoCapture cap;
-        cap.open(deviceId, CAP_ANY);
-        int frameWidth = int(cap.get(CAP_PROP_FRAME_WIDTH));
-        int frameHeight = int(cap.get(CAP_PROP_FRAME_HEIGHT));
+        int frameWidth, frameHeight;
+        float scale = parser.get<float>("scale");
+        VideoCapture capture;
+        std::string video = parser.get<string>("video");
+        if (video.size() == 1 && isdigit(video[0]))
+            capture.open(parser.get<int>("video"));
+        else
+            capture.open(samples::findFileOrKeep(video));  // keep GStreamer pipelines
+        if (capture.isOpened())
+        {
+            frameWidth = int(capture.get(CAP_PROP_FRAME_WIDTH) * scale);
+            frameHeight = int(capture.get(CAP_PROP_FRAME_HEIGHT) * scale);
+            cout << "Video " << video
+                << ": width=" << frameWidth
+                << ", height=" << frameHeight
+                << endl;
+        }
+        else
+        {
+            cout << "Could not initialize video capturing: " << video << "\n";
+            return 1;
+        }
+
        detector->setInputSize(Size(frameWidth, frameHeight));

-        Mat frame;
-        TickMeter tm;
-        String msg = "FPS: ";
-        while(waitKey(1) < 0) // Press any key to exit
+        cout << "Press 'SPACE' to save frame, any other key to exit..." << endl;
+        int nFrame = 0;
+        for (;;)
        {
            // Get frame
-            if (!cap.read(frame))
+            Mat frame;
+            if (!capture.read(frame))
            {
-                cerr << "No frames grabbed!\n";
+                cerr << "Can't grab frame! Stop\n";
                break;
            }

+            resize(frame, frame, Size(frameWidth, frameHeight));
+
            // Inference
            Mat faces;
            tm.start();
            detector->detect(frame, faces);
            tm.stop();

+            Mat result = frame.clone();
            // Draw results on the input image
-            Mat result = visualize(frame, faces);
-            putText(result, msg + to_string(tm.getFPS()), Point(0, 15), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0));
+            visualize(result, nFrame, faces, tm.getFPS());

            // Visualize results
            imshow("Live", result);

-            tm.reset();
+            int key = waitKey(1);
+            bool saveFrame = save;
+            if (key == ' ')
+            {
+                saveFrame = true;
+                key = 0;  // handled
+            }
+
+            if (saveFrame)
+            {
+                std::string frame_name = cv::format("frame_%05d.png", nFrame);
+                std::string result_name = cv::format("result_%05d.jpg", nFrame);
+                cout << "Saving '" << frame_name << "' and '" << result_name << "' ...\n";
+                imwrite(frame_name, frame);
+                imwrite(result_name, result);
+            }
+
+            ++nFrame;
+
+            if (key > 0)
+                break;
        }
+        cout << "Processed " << nFrame << " frames" << endl;
    }
-}
\ No newline at end of file
+    cout << "Done." << endl;
+    return 0;
+}
--- a/samples/dnn/face_detect.py
+++ b/samples/dnn/face_detect.py
@@ -12,90 +12,144 @@ def str2bool(v):
        raise NotImplementedError

 parser = argparse.ArgumentParser()
-parser.add_argument('--input', '-i', type=str, help='Path to the input image.')
-parser.add_argument('--model', '-m', type=str, default='yunet.onnx', help='Path to the model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
+parser.add_argument('--image1', '-i1', type=str, help='Path to the input image1. Omit for detecting on default camera.')
+parser.add_argument('--image2', '-i2', type=str, help='Path to the input image2. When image1 and image2 parameters given then the program try to find a face on both images and runs face recognition algorithm.')
+parser.add_argument('--video', '-v', type=str, help='Path to the input video.')
+parser.add_argument('--scale', '-sc', type=float, default=1.0, help='Scale factor used to resize input video frames.')
+parser.add_argument('--face_detection_model', '-fd', type=str, default='yunet.onnx', help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
+parser.add_argument('--face_recognition_model', '-fr', type=str, default='face_recognizer_fast.onnx', help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
 parser.add_argument('--score_threshold', type=float, default=0.9, help='Filtering out faces of score < score_threshold.')
 parser.add_argument('--nms_threshold', type=float, default=0.3, help='Suppress bounding boxes of iou >= nms_threshold.')
 parser.add_argument('--top_k', type=int, default=5000, help='Keep top_k bounding boxes before NMS.')
 parser.add_argument('--save', '-s', type=str2bool, default=False, help='Set true to save results. This flag is invalid when using camera.')
-parser.add_argument('--vis', '-v', type=str2bool, default=True, help='Set true to open a window for result visualization. This flag is invalid when using camera.')
 args = parser.parse_args()

-def visualize(input, faces, thickness=2):
-    output = input.copy()
+def visualize(input, faces, fps, thickness=2):
    if faces[1] is not None:
        for idx, face in enumerate(faces[1]):
            print('Face {}, top-left coordinates: ({:.0f}, {:.0f}), box width: {:.0f}, box height {:.0f}, score: {:.2f}'.format(idx, face[0], face[1], face[2], face[3], face[-1]))

            coords = face[:-1].astype(np.int32)
-            cv.rectangle(output, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), 2)
-            cv.circle(output, (coords[4], coords[5]), 2, (255, 0, 0), 2)
-            cv.circle(output, (coords[6], coords[7]), 2, (0, 0, 255), 2)
-            cv.circle(output, (coords[8], coords[9]), 2, (0, 255, 0), 2)
-            cv.circle(output, (coords[10], coords[11]), 2, (255, 0, 255), 2)
-            cv.circle(output, (coords[12], coords[13]), 2, (0, 255, 255), 2)
-    return output
+            cv.rectangle(input, (coords[0], coords[1]), (coords[0]+coords[2], coords[1]+coords[3]), (0, 255, 0), thickness)
+            cv.circle(input, (coords[4], coords[5]), 2, (255, 0, 0), thickness)
+            cv.circle(input, (coords[6], coords[7]), 2, (0, 0, 255), thickness)
+            cv.circle(input, (coords[8], coords[9]), 2, (0, 255, 0), thickness)
+            cv.circle(input, (coords[10], coords[11]), 2, (255, 0, 255), thickness)
+            cv.circle(input, (coords[12], coords[13]), 2, (0, 255, 255), thickness)
+    cv.putText(input, 'FPS: {:.2f}'.format(fps), (1, 16), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)

 if __name__ == '__main__':

-    # Instantiate FaceDetectorYN
+    ## [initialize_FaceDetectorYN]
    detector = cv.FaceDetectorYN.create(
-        args.model,
+        args.face_detection_model,
        "",
        (320, 320),
        args.score_threshold,
        args.nms_threshold,
        args.top_k
    )
+    ## [initialize_FaceDetectorYN]
+
+    tm = cv.TickMeter()

    # If input is an image
-    if args.input is not None:
-        image = cv.imread(args.input)
+    if args.image1 is not None:
+        img1 = cv.imread(cv.samples.findFile(args.image1))

+        tm.start()
+        ## [inference]
        # Set input size before inference
-        detector.setInputSize((image.shape[1], image.shape[0]))
+        detector.setInputSize((img1.shape[1], img1.shape[0]))
+
+        faces1 = detector.detect(img1)
+        ## [inference]

-        # Inference
-        faces = detector.detect(image)
+        tm.stop()
+        assert faces1[1] is not None, 'Cannot find a face in {}'.format(args.image1)

        # Draw results on the input image
-        result = visualize(image, faces)
+        visualize(img1, faces1, tm.getFPS())

        # Save results if save is true
        if args.save:
-            print('Resutls saved to result.jpg\n')
-            cv.imwrite('result.jpg', result)
+            print('Results saved to result.jpg\n')
+            cv.imwrite('result.jpg', img1)

        # Visualize results in a new window
-        if args.vis:
-            cv.namedWindow(args.input, cv.WINDOW_AUTOSIZE)
-            cv.imshow(args.input, result)
-            cv.waitKey(0)
+        cv.imshow("image1", img1)
+
+        if args.image2 is not None:
+            img2 = cv.imread(cv.samples.findFile(args.image2))
+
+            tm.reset()
+            tm.start()
+            detector.setInputSize((img2.shape[1], img2.shape[0]))
+            faces2 = detector.detect(img2)
+            tm.stop()
+            assert faces2[1] is not None, 'Cannot find a face in {}'.format(args.image2)
+            visualize(img2, faces2, tm.getFPS())
+            cv.imshow("image2", img2)
+
+            ## [initialize_FaceRecognizerSF]
+            recognizer = cv.FaceRecognizerSF.create(
+            args.face_recognition_model,"")
+            ## [initialize_FaceRecognizerSF]
+
+            ## [facerecognizer]
+            # Align faces
+            face1_align = recognizer.alignCrop(img1, faces1[1][0])
+            face2_align = recognizer.alignCrop(img2, faces2[1][0])
+
+            # Extract features
+            face1_feature = recognizer.feature(face1_align)
+            face2_feature = recognizer.feature(face2_align)
+            ## [facerecognizer]
+
+            cosine_similarity_threshold = 0.363
+            l2_similarity_threshold = 1.128
+
+            ## [match]
+            cosine_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_COSINE)
+            l2_score = recognizer.match(face1_feature, face2_feature, cv.FaceRecognizerSF_FR_NORM_L2)
+            ## [match]
+
+            msg = 'different identities'
+            if cosine_score >= cosine_similarity_threshold:
+                msg = 'the same identity'
+            print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
+
+            msg = 'different identities'
+            if l2_score <= l2_similarity_threshold:
+                msg = 'the same identity'
+            print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))
+        cv.waitKey(0)
    else: # Omit input to call default camera
-        deviceId = 0
+        if args.video is not None:
+            deviceId = args.video
+        else:
+            deviceId = 0
        cap = cv.VideoCapture(deviceId)
-        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH))
-        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT))
+        frameWidth = int(cap.get(cv.CAP_PROP_FRAME_WIDTH)*args.scale)
+        frameHeight = int(cap.get(cv.CAP_PROP_FRAME_HEIGHT)*args.scale)
        detector.setInputSize([frameWidth, frameHeight])

-        tm = cv.TickMeter()
        while cv.waitKey(1) < 0:
            hasFrame, frame = cap.read()
            if not hasFrame:
                print('No frames grabbed!')
                break

+            frame = cv.resize(frame, (frameWidth, frameHeight))
+
            # Inference
            tm.start()
            faces = detector.detect(frame) # faces is a tuple
            tm.stop()

            # Draw results on the input image
-            frame = visualize(frame, faces)
+            visualize(frame, faces, tm.getFPS())

-            cv.putText(frame, 'FPS: {}'.format(tm.getFPS()), (0, 15), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0))
-
-            # Visualize results in a new Window
+            # Visualize results
            cv.imshow('Live', frame)
-
-            tm.reset()
\ No newline at end of file
+    cv.destroyAllWindows()
--- a/samples/dnn/face_match.cpp
+++ b/samples/dnn/face_match.cpp
-// This file is part of OpenCV project.
-// It is subject to the license terms in the LICENSE file found in the top-level directory
-// of this distribution and at http://opencv.org/license.html.
-
-#include "opencv2/dnn.hpp"
-#include "opencv2/imgproc.hpp"
-#include "opencv2/highgui.hpp"
-
-#include <iostream>
-
-#include "opencv2/objdetect.hpp"
-
-
-using namespace cv;
-using namespace std;
-
-
-int main(int argc, char ** argv)
-{
-    if (argc != 5)
-    {
-        std::cerr << "Usage " << argv[0] << ": "
-                  << "<det_onnx_path> "
-                  << "<reg_onnx_path> "
-                  << "<image1>"
-                  << "<image2>\n";
-        return -1;
-    }
-
-    String det_onnx_path = argv[1];
-    String reg_onnx_path = argv[2];
-    String image1_path = argv[3];
-    String image2_path = argv[4];
-    std::cout<<image1_path<<" "<<image2_path<<std::endl;
-    Mat image1 = imread(image1_path);
-    Mat image2 = imread(image2_path);
-
-    float score_thresh = 0.9f;
-    float nms_thresh = 0.3f;
-    double cosine_similar_thresh = 0.363;
-    double l2norm_similar_thresh = 1.128;
-    int top_k = 5000;
-
-    // Initialize FaceDetector
-    Ptr<FaceDetectorYN> faceDetector;
-
-    faceDetector = FaceDetectorYN::create(det_onnx_path, "", image1.size(), score_thresh, nms_thresh, top_k);
-    Mat faces_1;
-    faceDetector->detect(image1, faces_1);
-    if (faces_1.rows < 1)
-    {
-        std::cerr << "Cannot find a face in " << image1_path << "\n";
-        return -1;
-    }
-
-    faceDetector = FaceDetectorYN::create(det_onnx_path, "", image2.size(), score_thresh, nms_thresh, top_k);
-    Mat faces_2;
-    faceDetector->detect(image2, faces_2);
-    if (faces_2.rows < 1)
-    {
-        std::cerr << "Cannot find a face in " << image2_path << "\n";
-        return -1;
-    }
-
-    // Initialize FaceRecognizerSF
-    Ptr<FaceRecognizerSF> faceRecognizer = FaceRecognizerSF::create(reg_onnx_path, "");
-
-
-    Mat aligned_face1, aligned_face2;
-    faceRecognizer->alignCrop(image1, faces_1.row(0), aligned_face1);
-    faceRecognizer->alignCrop(image2, faces_2.row(0), aligned_face2);
-
-    Mat feature1, feature2;
-    faceRecognizer->feature(aligned_face1, feature1);
-    feature1 = feature1.clone();
-    faceRecognizer->feature(aligned_face2, feature2);
-    feature2 = feature2.clone();
-
-    double cos_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_COSINE);
-    double L2_score = faceRecognizer->match(feature1, feature2, FaceRecognizerSF::DisType::FR_NORM_L2);
-
-    if(cos_score >= cosine_similar_thresh)
-    {
-        std::cout << "They have the same identity;";
-    }
-    else
-    {
-        std::cout << "They have different identities;";
-    }
-    std::cout << " Cosine Similarity: " << cos_score << ", threshold: " << cosine_similar_thresh << ". (higher value means higher similarity, max 1.0)\n";
-
-    if(L2_score <= l2norm_similar_thresh)
-    {
-        std::cout << "They have the same identity;";
-    }
-    else
-    {
-        std::cout << "They have different identities.";
-    }
-    std::cout << " NormL2 Distance: " << L2_score << ", threshold: " << l2norm_similar_thresh << ". (lower value means higher similarity, min 0.0)\n";
-
-    return 0;
-}
--- a/samples/dnn/face_match.py
+++ b/samples/dnn/face_match.py
-import argparse
-
-import numpy as np
-import cv2 as cv
-
-parser = argparse.ArgumentParser()
-parser.add_argument('--input1', '-i1', type=str, help='Path to the input image1.')
-parser.add_argument('--input2', '-i2', type=str, help='Path to the input image2.')
-parser.add_argument('--face_detection_model', '-fd', type=str, help='Path to the face detection model. Download the model at https://github.com/ShiqiYu/libfacedetection.train/tree/master/tasks/task1/onnx.')
-parser.add_argument('--face_recognition_model', '-fr', type=str, help='Path to the face recognition model. Download the model at https://drive.google.com/file/d/1ClK9WiB492c5OZFKveF3XiHCejoOxINW/view.')
-args = parser.parse_args()
-
-# Read the input image
-img1 = cv.imread(args.input1)
-img2 = cv.imread(args.input2)
-
-# Instantiate face detector and recognizer
-detector = cv.FaceDetectorYN.create(
-    args.face_detection_model,
-    "",
-    (img1.shape[1], img1.shape[0])
-)
-recognizer = cv.FaceRecognizerSF.create(
-    args.face_recognition_model,
-    ""
-)
-
-# Detect face
-detector.setInputSize((img1.shape[1], img1.shape[0]))
-face1 = detector.detect(img1)
-detector.setInputSize((img2.shape[1], img2.shape[0]))
-face2 = detector.detect(img2)
-assert face1[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input1)
-assert face2[1].shape[0] > 0, 'Cannot find a face in {}'.format(args.input2)
-
-# Align faces
-face1_align = recognizer.alignCrop(img1, face1[1][0])
-face2_align = recognizer.alignCrop(img2, face2[1][0])
-
-# Extract features
-face1_feature = recognizer.feature(face1_align)
-face2_feature = recognizer.feature(face2_align)
-
-# Calculate distance (0: cosine, 1: L2)
-cosine_similarity_threshold = 0.363
-cosine_score = recognizer.match(face1_feature, face2_feature, 0)
-msg = 'different identities'
-if cosine_score >= cosine_similarity_threshold:
-    msg = 'the same identity'
-print('They have {}. Cosine Similarity: {}, threshold: {} (higher value means higher similarity, max 1.0).'.format(msg, cosine_score, cosine_similarity_threshold))
-
-l2_similarity_threshold = 1.128
-l2_score = recognizer.match(face1_feature, face2_feature, 1)
-msg = 'different identities'
-if l2_score <= l2_similarity_threshold:
-    msg = 'the same identity'
-print('They have {}. NormL2 Distance: {}, threshold: {} (lower value means higher similarity, min 0.0).'.format(msg, l2_score, l2_similarity_threshold))