未验证 提交 3e71f042 编写于 作者: B BowShotDS 提交者: GitHub

update DFQ/EQ/Evaluate int8 perchannel quant tool (#1112)

* update DFQ/EQ/Evaluate int8 perchannel quant tool

* apply code-format changes
Co-authored-by: NYour Name <you@example.com>
Co-authored-by: NBowShotDS <BowShotDS@users.noreply.github.com>
上级 f6761750
...@@ -30,6 +30,8 @@ IF (${TENGINE_TARGET_PROCESSOR} MATCHES "X86") ...@@ -30,6 +30,8 @@ IF (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
ADD_EXECUTABLE( ADD_EXECUTABLE(
${name} ${name}
./quant_save_graph.cpp ./quant_save_graph.cpp
./algorithm/quant_dfq.cpp
./algorithm/quant_eq.cpp
./quant_utils.cpp ./quant_utils.cpp
../save_graph/save_graph.cpp ../save_graph/save_graph.cpp
../save_graph/tm2_op_save.cpp ../save_graph/tm2_op_save.cpp
......
...@@ -76,7 +76,7 @@ Status : int8, per-channel, symmetric ...@@ -76,7 +76,7 @@ Status : int8, per-channel, symmetric
Before use the quant tool, **you need Float32 tmfile and Calibration Dataset**, the image num of calibration dataset we suggest to use 500-1000. Before use the quant tool, **you need Float32 tmfile and Calibration Dataset**, the image num of calibration dataset we suggest to use 500-1000.
``` ```
$ .quant_tool_int8 -m ./mobilenet_fp32.tmfile -i ./dataset -o ./mobilenet_int8.tmfile -g 3,224,224 -w 104.007,116.669,122.679 -s 0.017,0.017,0.017 $ .quant_tool_int8 -m ./mobilenet_fp32.tmfile -i ./dataset -o ./mobilenet_int8.tmfile -g 3,224,224 -w 104.007,116.669,122.679 -s 0.017,0.017,0.017 -z 1
---- Tengine Post Training Quantization Tool ---- ---- Tengine Post Training Quantization Tool ----
...@@ -111,6 +111,38 @@ Thread num : 1 ...@@ -111,6 +111,38 @@ Thread num : 1
[Quant Tools Info]: Step 4, quantize activation tensor done. [Quant Tools Info]: Step 4, quantize activation tensor done.
[Quant Tools Info]: Step 5, quantize weight tensor done. [Quant Tools Info]: Step 5, quantize weight tensor done.
[Quant Tools Info]: Step 6, save Int8 tmfile done, ./mobilenet_int8.tmfile [Quant Tools Info]: Step 6, save Int8 tmfile done, ./mobilenet_int8.tmfile
[Quant Tools Info]: Step Evaluate, evaluate quantitative losses
cosin 0 32 avg 0.995317 ### 0.000000 0.953895 0.998249 0.969256 ...
cosin 1 32 avg 0.982403 ### 0.000000 0.902383 0.964436 0.873998 ...
cosin 2 64 avg 0.976753 ### 0.952854 0.932301 0.982766 0.958503 ...
cosin 3 64 avg 0.981889 ### 0.976637 0.981754 0.987276 0.970671 ...
cosin 4 128 avg 0.979728 ### 0.993999 0.991858 0.990438 0.992766 ...
cosin 5 128 avg 0.970351 ### 0.772556 0.989541 0.986996 0.989563 ...
cosin 6 128 avg 0.954545 ### 0.950125 0.922964 0.946804 0.972852 ...
cosin 7 128 avg 0.977192 ### 0.994728 0.972071 0.995353 0.992700 ...
cosin 8 256 avg 0.977426 ### 0.968429 0.991248 0.991274 0.994450 ...
cosin 9 256 avg 0.962224 ### 0.985255 0.969171 0.958762 0.967461 ...
cosin 10 256 avg 0.954253 ### 0.984353 0.935643 0.656188 0.929778 ...
cosin 11 256 avg 0.971987 ### 0.997596 0.967681 0.476525 0.999115 ...
cosin 12 512 avg 0.972861 ### 0.968920 0.905907 0.993918 0.622953 ...
cosin 13 512 avg 0.959161 ### 0.935686 0.000000 0.642560 0.994388 ...
cosin 14 512 avg 0.963903 ### 0.979613 0.957169 0.976440 0.902512 ...
cosin 15 512 avg 0.963226 ### 0.977065 0.965819 0.998149 0.905297 ...
cosin 16 512 avg 0.960935 ### 0.861674 0.972926 0.950579 0.987609 ...
cosin 17 512 avg 0.961057 ### 0.738472 0.987884 0.999124 0.995397 ...
cosin 18 512 avg 0.960127 ### 0.935455 0.968909 0.970831 0.981240 ...
cosin 19 512 avg 0.963755 ### 0.972628 0.992305 0.999518 0.799737 ...
cosin 20 512 avg 0.949364 ### 0.922776 0.896038 0.945079 0.971338 ...
cosin 21 512 avg 0.961256 ### 0.902256 0.896438 0.923361 0.973974 ...
cosin 22 512 avg 0.946552 ### 0.963806 0.982075 0.878965 0.929992 ...
cosin 23 512 avg 0.953677 ### 0.953880 0.996364 0.936540 0.930796 ...
cosin 24 1024 avg 0.941197 ### 0.000000 0.992507 1.000000 0.994460 ...
cosin 25 1024 avg 0.973546 ### 1.000000 0.889181 0.000000 0.998084 ...
cosin 26 1024 avg 0.869351 ### 0.522966 0.000000 0.987009 0.000000 ...
cosin 27 1 avg 0.974982 ### 0.974982
cosin 28 1 avg 0.974982 ### 0.974982
cosin 29 1 avg 0.974982 ### 0.974982
cosin 30 1 avg 0.978486 ### 0.978486
---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\(^0^)/ ---- ---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\(^0^)/ ----
``` ```
......
此差异已折叠。
此差异已折叠。
...@@ -505,6 +505,11 @@ int save_graph_i8_perchannel(const char* model_file, const char* scale_file, con ...@@ -505,6 +505,11 @@ int save_graph_i8_perchannel(const char* model_file, const char* scale_file, con
if (internal) if (internal)
{ {
// TODO // TODO
for (int ch = 0; ch < channel_num; ch++)
{
weight_scale_list[ch] = weight_tensor->scale_list[ch];
weight_zp_list[ch] = 0;
}
} }
else else
{ {
......
...@@ -22,9 +22,14 @@ ...@@ -22,9 +22,14 @@
* Author: hhchen@openailab.com * Author: hhchen@openailab.com
*/ */
#pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#include <unordered_map> #include <unordered_map>
#include <fstream>
#include <cstring>
#include <algorithm>
extern "C" { extern "C" {
#include "api/c_api.h" #include "api/c_api.h"
...@@ -34,11 +39,40 @@ extern "C" { ...@@ -34,11 +39,40 @@ extern "C" {
#include "graph/tensor.h" #include "graph/tensor.h"
#include "utility/sys_port.h" #include "utility/sys_port.h"
#include "utility/utils.h" #include "utility/utils.h"
#include "utility/log.h"
#include "utility/vector.h"
#include "../source/device/cpu/cpu_node.h"
#include "../source/device/cpu/cpu_graph.h"
#include "convolution_param.h"
#include "fc_param.h"
#include "pooling_param.h"
#include "relu_param.h"
} }
#include "quant_utils.hpp"
#include "quant_save_graph.hpp"
typedef std::unordered_map<std::string, int> dict_str2int;
typedef std::unordered_map<std::string, float> dict_str2float;
typedef std::unordered_map<uint32_t, uint32_t> dict_uint2uint;
typedef std::unordered_map<uint32_t, std::vector<uint32_t> > dict_uint2vecuint;
typedef std::unordered_map<uint32_t, std::string> dict_uint2str;
typedef std::unordered_map<uint32_t, std::vector<double> > dict_uint2doublex;
#define ALGORITHM_MIN_MAX 0 #define ALGORITHM_MIN_MAX 0
#define ALGORITHM_KL 1 #define ALGORITHM_KL 1
#define ALGORITHM_ACIQ 2 #define ALGORITHM_ACIQ 2
#define ALGORITHM_DFQ 3
#define ALGORITHM_MM_EQ 4
struct node_graph
{
int pass;
std::vector<uint16_t> input_node_list;
std::vector<uint16_t> output_node_list;
};
class QuantTool class QuantTool
{ {
...@@ -46,7 +80,41 @@ public: ...@@ -46,7 +80,41 @@ public:
QuantTool(); QuantTool();
~QuantTool(); ~QuantTool();
int init();
int activation_quant_tool(); int activation_quant_tool();
int assess_quant_loss(int gen);
int quant_search();
int data_free_quant();
private:
void recursion_pass_through(struct graph* graphn, const char* layer_name, struct tensor* t,
dict_str2int& layer_used, dict_str2float& layer_scale,
dict_str2float& layer_zeropoint, dict_str2int& layer_pass);
struct exec_graph* get_exec_graph(struct graph* graphn);
void load_activation_scale(struct graph* graphn, const char* scale_file, int mode_sc);
int prerun_for_get_ir_tensor(void* graph, struct options opt);
void check_for_free();
void check_for_interlearve();
void weight_bias_requant(int search);
void conv_hcl_interleave_pack4_fp32(int M, int K, float* pA, float* pA_t);
void activation_requant(float* data, int elem_num, int bitcount, int symmetry, float scale, int zero_point = 0);
void weight_requant(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel);
void weight_requant_search(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel, float zoom);
void weight_requant_search(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel, float* zoom);
void bias_requant(struct tensor* input_tensor, struct tensor* weight_tensor, struct tensor* bias_tensor,
float* data, int elem_num, int elem_channel);
void set_node_input_output_tensor(int idx, int imgi, int snum);
double cosin_similarity(std::vector<float>* in_a, std::vector<float>* in_b, uint32_t imgs_num, uint32_t output_num);
double cosin_similarity(std::vector<std::vector<float> >& in_a, std::vector<std::vector<float> >& in_b, uint32_t imgs_num, uint32_t output_num);
void cosin_similarity(std::vector<double>& cosin, std::vector<std::vector<float> >& in_a, std::vector<std::vector<float> >& in_b, uint32_t imgs_num, uint32_t output_num, uint32_t output_channel); // cosin dis perchannel
void weight_bias_reset();
void free_used_layers(int idx);
void gen_weight_scale(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel);
int get_exec_node_message(int exec_node_idx);
void print_cosin(double* cosin, int idx, int output_channel);
public: public:
struct options opt; struct options opt;
...@@ -70,4 +138,72 @@ public: ...@@ -70,4 +138,72 @@ public:
int focus; // flag which indicates that focus process image is necessary(maybe using for YOLOv5, 0:OFF, 1:ON, default is 0) int focus; // flag which indicates that focus process image is necessary(maybe using for YOLOv5, 0:OFF, 1:ON, default is 0)
int inplace; // process the inplace quant scale of activation in some types of op, such as max pooling, ReLU, Flatten, Reshape, Clip int inplace; // process the inplace quant scale of activation in some types of op, such as max pooling, ReLU, Flatten, Reshape, Clip
int algorithm_type; // the type of quant algorithm(0:min-max, 1:kl, default is 0) int algorithm_type; // the type of quant algorithm(0:min-max, 1:kl, default is 0)
bool evaluate; // evaluate quantitative losses
private: // system variable
dict_uint2uint ir_exec;
dict_uint2uint exec_ir;
dict_uint2vecuint dict_free;
dict_uint2uint execidx_elemnum;
dict_uint2uint execidx_elemsize;
dict_uint2str execidx_nodename;
dict_uint2doublex execidx_loss;
int max_search_img_num;
std::vector<double> cosin;
private: // basic message
int img_size;
double cosin_max;
float scale_acc;
private: // ir graph variable
std::vector<std::vector<std::vector<float> > > fp32_out;
std::vector<std::vector<std::vector<float> > > fake_quant_out;
std::vector<std::vector<float> > input_datas_fp32;
std::vector<std::vector<float> > input_datas_fake_quant;
std::vector<std::vector<float> > out_imgs_fp32;
std::vector<std::vector<float> > out_imgs_fake_quant;
struct graph* graphn_fp32;
struct graph* graphn_fake_quant;
struct tensor* graph_input_tensor_fp32;
struct tensor* graph_input_tensor_fake_quant;
struct exec_graph* exec_graph_fp32;
struct exec_graph* exec_graph_fake_quant;
int exec_node_num;
private: // temp variable
uint16_t op_name;
struct exec_node* node_fp32;
struct exec_node* node_fake_quant;
struct node_ops* node_ops_fp32;
struct node_ops* node_ops_fake_quant;
struct tensor* input_tensor_fp32;
struct tensor* input_tensor_fake_quant;
struct tensor* weight_tensor_fp32;
struct tensor* weight_tensor_fake_quant;
struct tensor* bias_tensor_fp32;
struct tensor* bias_tensor_fake_quant;
struct tensor* output_tensor_fp32;
struct tensor* output_tensor_fake_quant;
float* weight_data_fp32;
float* weight_data_fake_quant;
uint32_t weight_size;
float* interleave_buffer_fp32;
float* interleave_buffer_fake_quant;
uint32_t interleave_size_fake;
float* bias_data_fp32;
float* bias_data_fake_quant;
uint32_t bias_size;
uint32_t output_channel;
struct conv_priv_info* conv_priv_info_fp32;
struct conv_priv_info* conv_priv_info_fake_quant;
struct conv_param* conv_param_fp32;
struct conv_param* conv_param_fake_quant;
}; };
...@@ -66,6 +66,7 @@ QuantTool::QuantTool() ...@@ -66,6 +66,7 @@ QuantTool::QuantTool()
this->focus = 0; this->focus = 0;
this->inplace = true; this->inplace = true;
this->algorithm_type = ALGORITHM_MIN_MAX; this->algorithm_type = ALGORITHM_MIN_MAX;
this->evaluate = false;
} }
QuantTool::~QuantTool() QuantTool::~QuantTool()
...@@ -163,6 +164,7 @@ int QuantTool::activation_quant_tool() ...@@ -163,6 +164,7 @@ int QuantTool::activation_quant_tool()
/* init minmax */ /* init minmax */
std::unordered_map<int, float> max_activation; std::unordered_map<int, float> max_activation;
std::unordered_map<int, float> min_activation; std::unordered_map<int, float> min_activation;
std::unordered_map<int, int> act_map;
uint32_t act_tensor_num = 0; uint32_t act_tensor_num = 0;
for (int i = 0; i < ir_graph->tensor_num; i++) for (int i = 0; i < ir_graph->tensor_num; i++)
{ {
...@@ -172,6 +174,7 @@ int QuantTool::activation_quant_tool() ...@@ -172,6 +174,7 @@ int QuantTool::activation_quant_tool()
act_tensor_num++; act_tensor_num++;
max_activation[i] = -FLT_MAX; max_activation[i] = -FLT_MAX;
min_activation[i] = FLT_MAX; min_activation[i] = FLT_MAX;
act_map[act_tensor_num - 1] = i;
} }
} }
...@@ -213,10 +216,134 @@ int QuantTool::activation_quant_tool() ...@@ -213,10 +216,134 @@ int QuantTool::activation_quant_tool()
} }
} }
} }
fprintf(stderr, "\n");
if (this->algorithm_type == ALGORITHM_KL) if (this->algorithm_type == ALGORITHM_KL)
{ {
/* todo support */ /* kl process divergence */
fprintf(stderr, "\r\n[****WARNING****]:Step 2 find original calibration kl threshold table NOT support temporarily!\n"); fprintf(stderr, "[Quant Tools Info]: Step 2, find calibration table.\n");
std::unordered_map<uint32_t, uint32_t> tensor_hist;
std::unordered_map<uint32_t, uint32_t> hist_tensor;
std::vector<std::vector<float> > hist_edge;
std::vector<std::vector<uint32_t> > hist_gram;
/* second loop, create histgram */
for (int nums = imgs_list.size() - 1; nums >= 0; nums--)
{
fprintf(stderr, "\r[Quant Tools Info]: Step 2, images %.5d / %.5d", nums + 1, img_num);
get_input_data_cv(imgs_list[nums].c_str(), input_data.data(), img_c, img_h, img_w, mean, scale, sw_RGB, center_crop, letterbox_rows, letterbox_cols, focus);
/* run graph */
if (run_graph(ir_graph, 1) < 0)
{
fprintf(stderr, "Run graph failed\n");
return -1;
}
/* calculate hist */
uint32_t inum = 0;
for (int i = 0; i < ir_graph->tensor_num; i++)
{
struct tensor* ir_tensor = ir_graph->tensor_list[i];
if (ir_tensor->tensor_type == TENSOR_TYPE_VAR || ir_tensor->tensor_type == TENSOR_TYPE_INPUT)
{
float step_max = std::abs(max_activation[i]);
if (std::abs(min_activation[i]) > step_max)
step_max = std::abs(min_activation[i]);
float step_bin = step_max / 2048.0f;
std::vector<float> every_edge;
if (nums == imgs_list.size() - 1)
{
for (int j = 0; j < 2048; j++)
{
float edge_float = (step_bin * (j + 0.5f));
every_edge.push_back(edge_float);
}
hist_edge.push_back(every_edge);
hist_gram.push_back(histCount((float*)ir_tensor->data, ir_tensor->elem_num, step_max));
}
else
{
std::vector<uint32_t> hist_tmp;
hist_tmp = histCount((float*)ir_tensor->data, ir_tensor->elem_num, step_max);
for (int j = 0; j < 2048; j++)
{
hist_gram[inum][j] += hist_tmp[j];
}
}
tensor_hist[i] = inum;
hist_tensor[inum] = i;
inum++;
}
}
}
fprintf(stderr, "\n");
/* save the calibration file with min-max algorithm with kl divergence */
int fake_quant_set = 127;
FILE* fp_kl = fopen("table_kl.scale", "wb");
for (int i = 0; i < act_tensor_num; i++)
{
struct tensor* t = ir_graph->tensor_list[act_map[i]];
int threshold_bin = threshold_distribution(hist_gram[i], fake_quant_set + 1);
fprintf(stderr, " threshold_bin %d \n", threshold_bin);
float act_scale = hist_edge[i][threshold_bin] / fake_quant_set;
int act_zero_point = 0;
/* the scale of softmax always is scale = 1 / 127.f */
for (int j = 0; j < ir_graph->node_num; j++)
{
struct node* noden = ir_graph->node_list[j];
struct tensor* tensor_tmp = get_ir_graph_tensor(ir_graph, noden->output_tensors[0]);
if (!(tensor_tmp->tensor_type == TENSOR_TYPE_INPUT || tensor_tmp->tensor_type == TENSOR_TYPE_VAR))
continue;
std::string tmp_op_name = get_op_name_from_type(noden->op.type);
std::string cur_name = t->name;
std::string tmp_name = tensor_tmp->name;
if ((cur_name == tmp_name) && tmp_op_name == "Softmax")
{
act_scale = 1 / 127.f;
act_zero_point = 0;
break;
}
}
/* the scale of eltwise */
for (int j = 0; j < ir_graph->node_num; j++)
{
struct node* noden = ir_graph->node_list[j];
std::string tmp_op_name = get_op_name_from_type(noden->op.type);
if (tmp_op_name == "Eltwise")
{
struct tensor* tensor_in0 = get_ir_graph_tensor(ir_graph, noden->input_tensors[0]);
struct tensor* tensor_in1 = get_ir_graph_tensor(ir_graph, noden->input_tensors[1]);
struct tensor* tensor_out = get_ir_graph_tensor(ir_graph, noden->output_tensors[0]);
std::string cur_name = t->name;
std::string tmp_name0 = tensor_in0->name;
std::string tmp_name1 = tensor_in1->name;
if ((cur_name == tmp_name0 || cur_name == tmp_name1))
{
act_scale = tensor_out->scale;
break;
}
}
}
t->scale = act_scale;
t->zero_point = 0;
fprintf(fp_kl, "%s %f %d\n", t->name, act_scale, act_zero_point);
}
fclose(fp_kl);
fprintf(stderr, "[Quant Tools Info]: Step 2, find calibration table done, output ./table_kl.scale\n");
} }
else if (this->algorithm_type == ALGORITHM_ACIQ) else if (this->algorithm_type == ALGORITHM_ACIQ)
{ {
...@@ -304,7 +431,7 @@ int QuantTool::activation_quant_tool() ...@@ -304,7 +431,7 @@ int QuantTool::activation_quant_tool()
fprintf(stderr, "\r\n[Quant Tools Info]: Step 2, find original calibration minmax threshold table done, output ./table_minmax.scale\n"); fprintf(stderr, "\r\n[Quant Tools Info]: Step 2, find original calibration minmax threshold table done, output ./table_minmax.scale\n");
} }
fprintf(stderr, "[Quant Tools Info]: Thread %d, image nums %d, total time %.2f ms, avg time %.2f ms\n", num_thread, img_num, total_time, total_time / img_num); // fprintf(stderr, "[Quant Tools Info]: Thread %d, image nums %d, total time %.2f ms, avg time %.2f ms\n", num_thread, img_num, total_time, total_time / img_num);
/* release tengine */ /* release tengine */
postrun_graph(ir_graph); postrun_graph(ir_graph);
...@@ -343,7 +470,7 @@ int main(int argc, char* argv[]) ...@@ -343,7 +470,7 @@ int main(int argc, char* argv[])
QuantTool quant_tool; QuantTool quant_tool;
int res; int res;
while ((res = getopt(argc, argv, "m:a:f:o:i:g:s:w:b:c:y:k:t:h")) != -1) while ((res = getopt(argc, argv, "m:a:f:o:i:g:s:w:b:c:y:k:z:t:h")) != -1)
{ {
switch (res) switch (res)
{ {
...@@ -390,6 +517,9 @@ int main(int argc, char* argv[]) ...@@ -390,6 +517,9 @@ int main(int argc, char* argv[])
case 'k': case 'k':
quant_tool.focus = atoi(optarg); quant_tool.focus = atoi(optarg);
break; break;
case 'z':
quant_tool.evaluate = atoi(optarg);
break;
case 't': case 't':
quant_tool.num_thread = atoi(optarg); quant_tool.num_thread = atoi(optarg);
quant_tool.opt.num_thread = atoi(optarg); quant_tool.opt.num_thread = atoi(optarg);
...@@ -444,35 +574,100 @@ int main(int argc, char* argv[]) ...@@ -444,35 +574,100 @@ int main(int argc, char* argv[])
fprintf(stderr, "YOLOv5 focus: %s\n", quant_tool.focus ? "ON" : "OFF"); fprintf(stderr, "YOLOv5 focus: %s\n", quant_tool.focus ? "ON" : "OFF");
fprintf(stderr, "Thread num : %d\n\n", quant_tool.num_thread); fprintf(stderr, "Thread num : %d\n\n", quant_tool.num_thread);
/* using 3rd calibration table file */ switch (quant_tool.algorithm_type)
if (quant_tool.scale_file.empty()) {
case ALGORITHM_MIN_MAX:
{ {
/* select algorithm */ if (quant_tool.scale_file.empty())
if (quant_tool.algorithm_type == ALGORITHM_MIN_MAX)
{ {
quant_tool.scale_file = "table_minmax.scale"; quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
} }
else if (quant_tool.algorithm_type == ALGORITHM_KL) save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_KL:
{
if (quant_tool.scale_file.empty())
{ {
quant_tool.scale_file = "table_kl.scale"; quant_tool.scale_file = "table_kl.scale";
quant_tool.activation_quant_tool();
} }
else if (quant_tool.algorithm_type == ALGORITHM_ACIQ) save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_ACIQ:
{
if (quant_tool.scale_file.empty())
{ {
quant_tool.scale_file = "table_aciq.scale"; quant_tool.scale_file = "table_aciq.scale";
quant_tool.activation_quant_tool();
} }
else save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_DFQ:
{
quant_tool.data_free_quant();
quant_tool.model_file = "test_dfq_fp32.tmfile";
if (quant_tool.scale_file.empty())
{ {
fprintf(stderr, "[Quant Tools Info]: algorithm not specified, using default type MIN MAX\n");
quant_tool.scale_file = "table_minmax.scale"; quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
} }
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
/* quantize activation */ /* Evaluate quantitative losses */
quant_tool.activation_quant_tool(); if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
break;
}
case ALGORITHM_MM_EQ:
{
if (quant_tool.scale_file.empty())
{
quant_tool.scale_file = "table_minmax.scale";
quant_tool.activation_quant_tool();
}
/* Evaluate quantitative losses */
if (quant_tool.evaluate)
{
fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
quant_tool.assess_quant_loss(0);
}
/* Enable EQ search */
fprintf(stderr, "[Quant Tools Info]: Step Search, enable EQ search\n");
quant_tool.quant_search();
quant_tool.model_file = "save_i8_eq.tmfile";
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, true);
break;
}
default:
{
fprintf(stderr, "Unsupported quantization type ... \n");
break;
}
} }
/* quantize weight/bias and save into int8 tmfile */
fprintf(stderr, "[Quant Tools Info]: Calibration file is using %s\n", quant_tool.scale_file.c_str());
save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
fprintf(stderr, "\n---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/ ----\n"); fprintf(stderr, "\n---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/ ----\n");
......
...@@ -77,7 +77,7 @@ void split(float* array, char* str, const char* del) ...@@ -77,7 +77,7 @@ void split(float* array, char* str, const char* del)
} }
void get_input_data_cv(const char* image_file, float* input_data, int img_c, int img_h, int img_w, const float* mean, void get_input_data_cv(const char* image_file, float* input_data, int img_c, int img_h, int img_w, const float* mean,
const float* scale, int sw_RGB = 0, int center_crop = 0, int letterbox_rows = 0, int letterbox_cols = 0, int focus = 0) const float* scale, int sw_RGB = 1, int center_crop = 0, int letterbox_rows = 0, int letterbox_cols = 0, int focus = 0)
{ {
/* only for yolov5s */ /* only for yolov5s */
if (focus == 1 && letterbox_rows > 0 && letterbox_cols > 0) if (focus == 1 && letterbox_rows > 0 && letterbox_cols > 0)
...@@ -411,6 +411,22 @@ std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, f ...@@ -411,6 +411,22 @@ std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, f
return hist; return hist;
} }
std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float abs_max)
{
float bin_scale = abs_max / 2047.f;
int bin_zp = 0;
std::vector<uint32_t> hist(2048);
for (int i = 0; i < elem_num; i++)
{
if (data[i] != 0)
{
uint32_t hist_idx = round(std::abs(data[i]) / bin_scale);
hist[hist_idx]++;
}
}
return hist;
}
float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b) float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b)
{ {
const size_t length = dist_a.size(); const size_t length = dist_a.size();
......
...@@ -40,6 +40,7 @@ void get_input_data_cv(const char* image_file, float* input_data, int img_c, int ...@@ -40,6 +40,7 @@ void get_input_data_cv(const char* image_file, float* input_data, int img_c, int
void readFileList(std::string basePath, std::vector<std::string>& imgs); void readFileList(std::string basePath, std::vector<std::string>& imgs);
std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, float min_val); std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, float min_val);
std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float abs_max);
float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b); float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册