提交 71bc617a 编写于 作者: N nihui

better ncnn2table multithreading, print parsed parameters, print progress

上级 2f7635b9
...@@ -224,6 +224,9 @@ int QuantNet::quantize_KL() ...@@ -224,6 +224,9 @@ int QuantNet::quantize_KL()
const int num_histogram_bins = 2048; const int num_histogram_bins = 2048;
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// initialize conv weight scales // initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++) for (int i = 0; i < conv_layer_count; i++)
...@@ -323,11 +326,20 @@ int QuantNet::quantize_KL() ...@@ -323,11 +326,20 @@ int QuantNet::quantize_KL()
} }
// count the absmax // count the absmax
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++) for (int i = 0; i < image_count; i++)
{ {
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor(); ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++) for (int j = 0; j < input_blob_count; j++)
{ {
const std::string& imagepath = listspaths[j][i]; const std::string& imagepath = listspaths[j][i];
...@@ -393,11 +405,20 @@ int QuantNet::quantize_KL() ...@@ -393,11 +405,20 @@ int QuantNet::quantize_KL()
} }
// build histogram // build histogram
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++) for (int i = 0; i < image_count; i++)
{ {
if (i % 100 == 0)
{
fprintf(stderr, "build histogram %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor(); ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++) for (int j = 0; j < input_blob_count; j++)
{ {
const std::string& imagepath = listspaths[j][i]; const std::string& imagepath = listspaths[j][i];
...@@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ() ...@@ -675,6 +696,9 @@ int QuantNet::quantize_ACIQ()
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
const int image_count = (int)listspaths[0].size(); const int image_count = (int)listspaths[0].size();
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// initialize conv weight scales // initialize conv weight scales
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads)
for (int i = 0; i < conv_layer_count; i++) for (int i = 0; i < conv_layer_count; i++)
...@@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ() ...@@ -777,12 +801,21 @@ int QuantNet::quantize_ACIQ()
} }
} }
// count the absmax abssum // count the absmax
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int i = 0; i < image_count; i++) for (int i = 0; i < image_count; i++)
{ {
if (i % 100 == 0)
{
fprintf(stderr, "count the absmax %.2f%% [ %d / %d ]\n", i * 100.f / image_count, i, image_count);
}
ncnn::Extractor ex = create_extractor(); ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int j = 0; j < input_blob_count; j++) for (int j = 0; j < input_blob_count; j++)
{ {
const std::string& imagepath = listspaths[j][i]; const std::string& imagepath = listspaths[j][i];
...@@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ() ...@@ -991,6 +1024,9 @@ int QuantNet::quantize_EQ()
const int conv_layer_count = (int)conv_layers.size(); const int conv_layer_count = (int)conv_layers.size();
const int conv_bottom_blob_count = (int)conv_bottom_blobs.size(); const int conv_bottom_blob_count = (int)conv_bottom_blobs.size();
std::vector<ncnn::UnlockedPoolAllocator> blob_allocators(quantize_num_threads);
std::vector<ncnn::UnlockedPoolAllocator> workspace_allocators(quantize_num_threads);
// max 50 images for EQ // max 50 images for EQ
const int image_count = std::min((int)listspaths[0].size(), 50); const int image_count = std::min((int)listspaths[0].size(), 50);
...@@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ() ...@@ -1015,11 +1051,20 @@ int QuantNet::quantize_EQ()
std::vector<double> avgsims(search_steps, 0.0); std::vector<double> avgsims(search_steps, 0.0);
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++) for (int ii = 0; ii < image_count; ii++)
{ {
if (ii % 100 == 0)
{
fprintf(stderr, "search weight scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, weight_scale.w, i, conv_layer_count);
}
ncnn::Extractor ex = create_extractor(); ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int jj = 0; jj < input_blob_count; jj++) for (int jj = 0; jj < input_blob_count; jj++)
{ {
const std::string& imagepath = listspaths[jj][ii]; const std::string& imagepath = listspaths[jj][ii];
...@@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ() ...@@ -1121,11 +1166,20 @@ int QuantNet::quantize_EQ()
std::vector<double> avgsims(search_steps, 0.0); std::vector<double> avgsims(search_steps, 0.0);
#pragma omp parallel for num_threads(quantize_num_threads) #pragma omp parallel for num_threads(quantize_num_threads) schedule(static, 1)
for (int ii = 0; ii < image_count; ii++) for (int ii = 0; ii < image_count; ii++)
{ {
if (ii % 100 == 0)
{
fprintf(stderr, "search bottom blob scale %.2f%% [ %d / %d ] for %d / %d of %d / %d\n", ii * 100.f / image_count, ii, image_count, j, bottom_blob_scale.w, i, conv_layer_count);
}
ncnn::Extractor ex = create_extractor(); ncnn::Extractor ex = create_extractor();
const int thread_num = ncnn::get_omp_thread_num();
ex.set_blob_allocator(&blob_allocators[thread_num]);
ex.set_workspace_allocator(&workspace_allocators[thread_num]);
for (int jj = 0; jj < input_blob_count; jj++) for (int jj = 0; jj < input_blob_count; jj++)
{ {
const std::string& imagepath = listspaths[jj][ii]; const std::string& imagepath = listspaths[jj][ii];
...@@ -1454,6 +1508,64 @@ static std::vector<int> parse_comma_pixel_type_list(char* s) ...@@ -1454,6 +1508,64 @@ static std::vector<int> parse_comma_pixel_type_list(char* s)
return aps; return aps;
} }
static void print_float_array_list(const std::vector<std::vector<float> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<float>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%f", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void print_int_array_list(const std::vector<std::vector<int> >& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const std::vector<int>& array = list[i];
fprintf(stderr, "[");
for (size_t j = 0; j < array.size(); j++)
{
fprintf(stderr, "%d", array[j]);
if (j != array.size() - 1)
fprintf(stderr, ",");
}
fprintf(stderr, "]");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void print_pixel_type_list(const std::vector<int>& list)
{
for (size_t i = 0; i < list.size(); i++)
{
const int type = list[i];
if (type == -233)
fprintf(stderr, "RAW");
if (type == ncnn::Mat::PIXEL_RGB)
fprintf(stderr, "RGB");
if (type == ncnn::Mat::PIXEL_BGR)
fprintf(stderr, "BGR");
if (type == ncnn::Mat::PIXEL_GRAY)
fprintf(stderr, "GRAY");
if (type == ncnn::Mat::PIXEL_RGBA)
fprintf(stderr, "RGBA");
if (type == ncnn::Mat::PIXEL_BGRA)
fprintf(stderr, "BGRA");
if (i != list.size() - 1)
fprintf(stderr, ",");
}
}
static void show_usage() static void show_usage()
{ {
fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n"); fprintf(stderr, "Usage: ncnn2table [ncnnparam] [ncnnbin] [list,...] [ncnntable] [(key=value)...]\n");
...@@ -1523,8 +1635,6 @@ int main(int argc, char** argv) ...@@ -1523,8 +1635,6 @@ int main(int argc, char** argv)
const char* key = kv; const char* key = kv;
char* value = eqs + 1; char* value = eqs + 1;
fprintf(stderr, "%s = %s\n", key, value);
// load mean norm shape // load mean norm shape
if (memcmp(key, "mean", 4) == 0) if (memcmp(key, "mean", 4) == 0)
net.means = parse_comma_float_array_list(value); net.means = parse_comma_float_array_list(value);
...@@ -1573,6 +1683,25 @@ int main(int argc, char** argv) ...@@ -1573,6 +1683,25 @@ int main(int argc, char** argv)
return -1; return -1;
} }
// print quantnet config
{
fprintf(stderr, "mean = ");
print_float_array_list(net.means);
fprintf(stderr, "\n");
fprintf(stderr, "norm = ");
print_float_array_list(net.norms);
fprintf(stderr, "\n");
fprintf(stderr, "shape = ");
print_int_array_list(net.shapes);
fprintf(stderr, "\n");
fprintf(stderr, "pixel = ");
print_pixel_type_list(net.type_to_pixels);
fprintf(stderr, "\n");
fprintf(stderr, "thread = %d\n", net.quantize_num_threads);
fprintf(stderr, "method = %s\n", method.c_str());
fprintf(stderr, "---------------------------------------\n");
}
if (method == "kl") if (method == "kl")
{ {
net.quantize_KL(); net.quantize_KL();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册