better ncnn2table multithreading, print parsed parameters, print progress

71bc617a · nihui · 2f7635b9 · 71bc617a
隐藏空白更改
内联并排

Showing with 137 addition and 8 deletion

tools/quantize/ncnn2table.cpp tools/quantize/ncnn2table.cpp +137 -8

未找到文件。
--- a/tools/quantize/ncnn2table.cpp
+++ b/tools/quantize/ncnn2table.cpp
@@ -224,6 +224,9 @@ int QuantNet::quantize_KL()
    const int num_histogram_bins = 2048;
+    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
+    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
    // initialize conv weight scales
    #pragma omp parallel for num_threads(quantize_num_threads)
    for (int i = 0; i < conv_layer_count; i++)
@@ -323,11 +326,20 @@ int QuantNet::quantize_KL()
    }
    // count the absmax
-    #pragma omp parallel for num_threads(quantize_num_threads)
+    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
+        if (i % 100 == 0)
+        {
+            fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
+        }
        ncnn::Extractor ex = create_extractor();
+        const int thread_num = ncnn::get_omp_thread_num();
+        ex.set_blob_allocator(&blob_allocators[thread_num]);
+        ex.set_workspace_allocator(&workspace_allocators[thread_num]);
        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -393,11 +405,20 @@ int QuantNet::quantize_KL()
    }
    // build histogram
-    #pragma omp parallel for num_threads(quantize_num_threads)
+    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
+        if (i % 100 == 0)
+        {
+            fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
+        }
        ncnn::Extractor ex = create_extractor();
+        const int thread_num = ncnn::get_omp_thread_num();
+        ex.set_blob_allocator(&blob_allocators[thread_num]);
+        ex.set_workspace_allocator(&workspace_allocators[thread_num]);
        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ()
    const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
    const int image_count = (int)listspaths[0].size();
+    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
+    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
    // initialize conv weight scales
    #pragma omp parallel for num_threads(quantize_num_threads)
    for (int i = 0; i < conv_layer_count; i++)
@@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ()
        }
    }
-    // count the absmax abssum
+    // count the absmax
-    #pragma omp parallel for num_threads(quantize_num_threads)
+    #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
    for (int i = 0; i < image_count; i++)
    {
+        if (i % 100 == 0)
+        {
+            fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
+        }
        ncnn::Extractor ex = create_extractor();
+        const int thread_num = ncnn::get_omp_thread_num();
+        ex.set_blob_allocator(&blob_allocators[thread_num]);
+        ex.set_workspace_allocator(&workspace_allocators[thread_num]);
        for (int j = 0; j < input_blob_count; j++)
        {
            const std::string& imagepath = listspaths[j][i];
@@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ()
    const int conv_layer_count = (int)conv_layers.size();
    const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
+    std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
+    std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
    // max 50 images for EQ
    const int image_count = std::min((int)listspaths[0].size(), 50);
@@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ()
            std::vector<double> avgsims(search_steps, 0.0);
-            #pragma omp parallel for num_threads(quantize_num_threads)
+            #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
            for (int ii = 0; ii < image_count; ii++)
            {
+                if (ii % 100 == 0)
+                {
+                    fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
+                }
                ncnn::Extractor ex = create_extractor();
+                const int thread_num = ncnn::get_omp_thread_num();
+                ex.set_blob_allocator(&blob_allocators[thread_num]);
+                ex.set_workspace_allocator(&workspace_allocators[thread_num]);
                for (int jj = 0; jj < input_blob_count; jj++)
                {
                    const std::string& imagepath = listspaths[jj][ii];
@@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ()
            std::vector<double> avgsims(search_steps, 0.0);
-            #pragma omp parallel for num_threads(quantize_num_threads)
+            #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
            for (int ii = 0; ii < image_count; ii++)
            {
+                if (ii % 100 == 0)
+                {
+                    fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
+                }
                ncnn::Extractor ex = create_extractor();
+                const int thread_num = ncnn::get_omp_thread_num();
+                ex.set_blob_allocator(&blob_allocators[thread_num]);
+                ex.set_workspace_allocator(&workspace_allocators[thread_num]);
                for (int jj = 0; jj < input_blob_count; jj++)
                {
                    const std::string& imagepath = listspaths[jj][ii];
@@ -1454,6 +1508,64 @@ static std::vector<int> parse_comma_pixel_type_list(char* s)
    return aps;
 }
+static void print_float_array_list(const std::vector<std::vector<float> >& list)
+{
+    for (size_t i = 0; i < list.size(); i++)
+    {
+        const std::vector<float>& array = list[i];
+        fprintf(stderr, "[");
+        for (size_t j = 0; j < array.size(); j++)
+        {
+            fprintf(stderr, "%f", array[j]);
+            if (j != array.size() - 1)
+                fprintf(stderr, ",");
+        }
+        fprintf(stderr, "]");
+        if (i != list.size() - 1)
+            fprintf(stderr, ",");
+    }
+}
+static void print_int_array_list(const std::vector<std::vector<int> >& list)
+{
+    for (size_t i = 0; i < list.size(); i++)
+    {
+        const std::vector<int>& array = list[i];
+        fprintf(stderr, "[");
+        for (size_t j = 0; j < array.size(); j++)
+        {
+            fprintf(stderr, "%d", array[j]);
+            if (j != array.size() - 1)
+                fprintf(stderr, ",");
+        }
+        fprintf(stderr, "]");
+        if (i != list.size() - 1)
+            fprintf(stderr, ",");
+    }
+}
+static void print_pixel_type_list(const std::vector<int>& list)
+{
+    for (size_t i = 0; i < list.size(); i++)
+    {
+        const int type = list[i];
+        if (type == -233)
+            fprintf(stderr, "RAW");
+        if (type == ncnn::Mat::PIXEL_RGB)
+            fprintf(stderr, "RGB");
+        if (type == ncnn::Mat::PIXEL_BGR)
+            fprintf(stderr, "BGR");
+        if (type == ncnn::Mat::PIXEL_GRAY)
+            fprintf(stderr, "GRAY");
+        if (type == ncnn::Mat::PIXEL_RGBA)
+            fprintf(stderr, "RGBA");
+        if (type == ncnn::Mat::PIXEL_BGRA)
+            fprintf(stderr, "BGRA");
+        if (i != list.size() - 1)
+            fprintf(stderr, ",");
+    }
+}
 static void show_usage()
 {
    fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
@@ -1523,8 +1635,6 @@ int main(int argc, char** argv)
        const char* key = kv;
        char* value = eqs + 1;
-        fprintf(stderr, "%s = %s\n", key, value);
        // load mean norm shape
        if (memcmp(key, "mean", 4) == 0)
            net.means = parse_comma_float_array_list(value);
@@ -1573,6 +1683,25 @@ int main(int argc, char** argv)
        return -1;
    }
+    // print quantnet config
+    {
+        fprintf(stderr, "mean = ");
+        print_float_array_list(net.means);
+        fprintf(stderr, "\n");
+        fprintf(stderr, "norm = ");
+        print_float_array_list(net.norms);
+        fprintf(stderr, "\n");
+        fprintf(stderr, "shape = ");
+        print_int_array_list(net.shapes);
+        fprintf(stderr, "\n");
+        fprintf(stderr, "pixel = ");
+        print_pixel_type_list(net.type_to_pixels);
+        fprintf(stderr, "\n");
+        fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
+        fprintf(stderr, "method = %s\n", method.c_str());
+        fprintf(stderr, "---------------------------------------\n");
+    }
    if (method == "kl")
    {
        net.quantize_KL();