update DFQ/EQ/Evaluate int8 perchannel quant tool (#1112)

* update DFQ/EQ/Evaluate int8 perchannel quant tool * apply code-format changes Co-authored-by: N Your Name <you@example.com> Co-authored-by: N BowShotDS <BowShotDS@users.noreply.github.com>

update DFQ/EQ/Evaluate int8 perchannel quant tool (#1112)
* update DFQ/EQ/Evaluate int8 perchannel quant tool * apply code-format changes Co-authored-by: N Your Name <you@example.com> Co-authored-by: N BowShotDS <BowShotDS@users.noreply.github.com>
3e71f042 · BowShotDS · GitHub · f6761750 · 3e71f042 · 3e71f042
9 changed file
--- a/tools/quantize/CMakeLists.txt
+++ b/tools/quantize/CMakeLists.txt
@@ -30,6 +30,8 @@ IF (${TENGINE_TARGET_PROCESSOR} MATCHES "X86")
        ADD_EXECUTABLE(
            ${name}
            ./quant_save_graph.cpp
+            ./algorithm/quant_dfq.cpp
+            ./algorithm/quant_eq.cpp
            ./quant_utils.cpp
            ../save_graph/save_graph.cpp
            ../save_graph/tm2_op_save.cpp

--- a/tools/quantize/README.md
+++ b/tools/quantize/README.md
@@ -76,7 +76,7 @@ Status      : int8, per-channel, symmetric
 Before use the quant tool, **you need Float32 tmfile and Calibration Dataset**, the image num of calibration dataset we suggest to use 500-1000.
 ```
-$ .quant_tool_int8  -m ./mobilenet_fp32.tmfile -i ./dataset -o ./mobilenet_int8.tmfile -g 3,224,224 -w 104.007,116.669,122.679 -s 0.017,0.017,0.017
+$ .quant_tool_int8  -m ./mobilenet_fp32.tmfile -i ./dataset -o ./mobilenet_int8.tmfile -g 3,224,224 -w 104.007,116.669,122.679 -s 0.017,0.017,0.017 -z 1
 ---- Tengine Post Training Quantization Tool ----
@@ -111,6 +111,38 @@ Thread num  : 1
 [Quant Tools Info]: Step 4, quantize activation tensor done.
 [Quant Tools Info]: Step 5, quantize weight tensor done.
 [Quant Tools Info]: Step 6, save Int8 tmfile done, ./mobilenet_int8.tmfile
+[Quant Tools Info]: Step Evaluate, evaluate quantitative losses
+cosin   0    32  avg  0.995317  ### 0.000000 0.953895 0.998249 0.969256 ...
+cosin   1    32  avg  0.982403  ### 0.000000 0.902383 0.964436 0.873998 ...
+cosin   2    64  avg  0.976753  ### 0.952854 0.932301 0.982766 0.958503 ...
+cosin   3    64  avg  0.981889  ### 0.976637 0.981754 0.987276 0.970671 ...
+cosin   4   128  avg  0.979728  ### 0.993999 0.991858 0.990438 0.992766 ...
+cosin   5   128  avg  0.970351  ### 0.772556 0.989541 0.986996 0.989563 ...
+cosin   6   128  avg  0.954545  ### 0.950125 0.922964 0.946804 0.972852 ...
+cosin   7   128  avg  0.977192  ### 0.994728 0.972071 0.995353 0.992700 ...
+cosin   8   256  avg  0.977426  ### 0.968429 0.991248 0.991274 0.994450 ...
+cosin   9   256  avg  0.962224  ### 0.985255 0.969171 0.958762 0.967461 ...
+cosin  10   256  avg  0.954253  ### 0.984353 0.935643 0.656188 0.929778 ...
+cosin  11   256  avg  0.971987  ### 0.997596 0.967681 0.476525 0.999115 ...
+cosin  12   512  avg  0.972861  ### 0.968920 0.905907 0.993918 0.622953 ...
+cosin  13   512  avg  0.959161  ### 0.935686 0.000000 0.642560 0.994388 ...
+cosin  14   512  avg  0.963903  ### 0.979613 0.957169 0.976440 0.902512 ...
+cosin  15   512  avg  0.963226  ### 0.977065 0.965819 0.998149 0.905297 ...
+cosin  16   512  avg  0.960935  ### 0.861674 0.972926 0.950579 0.987609 ...
+cosin  17   512  avg  0.961057  ### 0.738472 0.987884 0.999124 0.995397 ...
+cosin  18   512  avg  0.960127  ### 0.935455 0.968909 0.970831 0.981240 ...
+cosin  19   512  avg  0.963755  ### 0.972628 0.992305 0.999518 0.799737 ...
+cosin  20   512  avg  0.949364  ### 0.922776 0.896038 0.945079 0.971338 ...
+cosin  21   512  avg  0.961256  ### 0.902256 0.896438 0.923361 0.973974 ...
+cosin  22   512  avg  0.946552  ### 0.963806 0.982075 0.878965 0.929992 ...
+cosin  23   512  avg  0.953677  ### 0.953880 0.996364 0.936540 0.930796 ...
+cosin  24  1024  avg  0.941197  ### 0.000000 0.992507 1.000000 0.994460 ...
+cosin  25  1024  avg  0.973546  ### 1.000000 0.889181 0.000000 0.998084 ...
+cosin  26  1024  avg  0.869351  ### 0.522966 0.000000 0.987009 0.000000 ...
+cosin  27     1  avg  0.974982  ### 0.974982 
+cosin  28     1  avg  0.974982  ### 0.974982 
+cosin  29     1  avg  0.974982  ### 0.974982 
+cosin  30     1  avg  0.978486  ### 0.978486 
 ---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\(^0^)/ ----
 ```

--- a/tools/quantize/algorithm/quant_dfq.cpp
+++ b/tools/quantize/algorithm/quant_dfq.cpp
--- a/tools/quantize/algorithm/quant_eq.cpp
+++ b/tools/quantize/algorithm/quant_eq.cpp
--- a/tools/quantize/quant_save_graph.cpp
+++ b/tools/quantize/quant_save_graph.cpp
@@ -505,6 +505,11 @@ int save_graph_i8_perchannel(const char* model_file, const char* scale_file, con
            if (internal)
            {
                // TODO
+                for (int ch = 0; ch < channel_num; ch++)
+                {
+                    weight_scale_list[ch] = weight_tensor->scale_list[ch];
+                    weight_zp_list[ch] = 0;
+                }
            }
            else
            {

--- a/tools/quantize/quant_tool.hpp
+++ b/tools/quantize/quant_tool.hpp
@@ -22,9 +22,14 @@
 * Author: hhchen@openailab.com
 */
+#pragma once
 #include <string>
 #include <vector>
 #include <unordered_map>
+#include <fstream>
+#include <cstring>
+#include <algorithm>
 extern "C" {
 #include "api/c_api.h"
@@ -34,11 +39,40 @@ extern "C" {
 #include "graph/tensor.h"
 #include "utility/sys_port.h"
 #include "utility/utils.h"
+#include "utility/log.h"
+#include "utility/vector.h"
+#include "../source/device/cpu/cpu_node.h"
+#include "../source/device/cpu/cpu_graph.h"
+#include "convolution_param.h"
+#include "fc_param.h"
+#include "pooling_param.h"
+#include "relu_param.h"
 }
+#include "quant_utils.hpp"
+#include "quant_save_graph.hpp"
+typedef std::unordered_map<std::string, int> dict_str2int;
+typedef std::unordered_map<std::string, float> dict_str2float;
+typedef std::unordered_map<uint32_t, uint32_t> dict_uint2uint;
+typedef std::unordered_map<uint32_t, std::vector<uint32_t> > dict_uint2vecuint;
+typedef std::unordered_map<uint32_t, std::string> dict_uint2str;
+typedef std::unordered_map<uint32_t, std::vector<double> > dict_uint2doublex;
 #define ALGORITHM_MIN_MAX 0
 #define ALGORITHM_KL      1
 #define ALGORITHM_ACIQ    2
+#define ALGORITHM_DFQ     3
+#define ALGORITHM_MM_EQ   4
+struct node_graph
+{
+    int pass;
+    std::vector<uint16_t> input_node_list;
+    std::vector<uint16_t> output_node_list;
+};
 class QuantTool
 {
@@ -46,7 +80,41 @@ public:
    QuantTool();
    ~QuantTool();
+    int init();
    int activation_quant_tool();
+    int assess_quant_loss(int gen);
+    int quant_search();
+    int data_free_quant();
+private:
+    void recursion_pass_through(struct graph* graphn, const char* layer_name, struct tensor* t,
+                                dict_str2int& layer_used, dict_str2float& layer_scale,
+                                dict_str2float& layer_zeropoint, dict_str2int& layer_pass);
+    struct exec_graph* get_exec_graph(struct graph* graphn);
+    void load_activation_scale(struct graph* graphn, const char* scale_file, int mode_sc);
+    int prerun_for_get_ir_tensor(void* graph, struct options opt);
+    void check_for_free();
+    void check_for_interlearve();
+    void weight_bias_requant(int search);
+    void conv_hcl_interleave_pack4_fp32(int M, int K, float* pA, float* pA_t);
+    void activation_requant(float* data, int elem_num, int bitcount, int symmetry, float scale, int zero_point = 0);
+    void weight_requant(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel);
+    void weight_requant_search(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel, float zoom);
+    void weight_requant_search(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel, float* zoom);
+    void bias_requant(struct tensor* input_tensor, struct tensor* weight_tensor, struct tensor* bias_tensor,
+                      float* data, int elem_num, int elem_channel);
+    void set_node_input_output_tensor(int idx, int imgi, int snum);
+    double cosin_similarity(std::vector<float>* in_a, std::vector<float>* in_b, uint32_t imgs_num, uint32_t output_num);
+    double cosin_similarity(std::vector<std::vector<float> >& in_a, std::vector<std::vector<float> >& in_b, uint32_t imgs_num, uint32_t output_num);
+    void cosin_similarity(std::vector<double>& cosin, std::vector<std::vector<float> >& in_a, std::vector<std::vector<float> >& in_b, uint32_t imgs_num, uint32_t output_num, uint32_t output_channel); // cosin dis perchannel
+    void weight_bias_reset();
+    void free_used_layers(int idx);
+    void gen_weight_scale(struct tensor* weight_tensor, float* data, int elem_num, int bitcount, int symmetry, int elem_channel);
+    int get_exec_node_message(int exec_node_idx);
+    void print_cosin(double* cosin, int idx, int output_channel);
 public:
    struct options opt;
@@ -70,4 +138,72 @@ public:
    int focus;          // flag which indicates that focus process image is necessary(maybe using for YOLOv5, 0:OFF, 1:ON, default is 0)
    int inplace;        // process the inplace quant scale of activation in some types of op, such as max pooling, ReLU, Flatten, Reshape, Clip
    int algorithm_type; // the type of quant algorithm(0:min-max, 1:kl, default is 0)
+    bool evaluate;      // evaluate quantitative losses
+private: // system variable
+    dict_uint2uint ir_exec;
+    dict_uint2uint exec_ir;
+    dict_uint2vecuint dict_free;
+    dict_uint2uint execidx_elemnum;
+    dict_uint2uint execidx_elemsize;
+    dict_uint2str execidx_nodename;
+    dict_uint2doublex execidx_loss;
+    int max_search_img_num;
+    std::vector<double> cosin;
+private: // basic message
+    int img_size;
+    double cosin_max;
+    float scale_acc;
+private: // ir graph variable
+    std::vector<std::vector<std::vector<float> > > fp32_out;
+    std::vector<std::vector<std::vector<float> > > fake_quant_out;
+    std::vector<std::vector<float> > input_datas_fp32;
+    std::vector<std::vector<float> > input_datas_fake_quant;
+    std::vector<std::vector<float> > out_imgs_fp32;
+    std::vector<std::vector<float> > out_imgs_fake_quant;
+    struct graph* graphn_fp32;
+    struct graph* graphn_fake_quant;
+    struct tensor* graph_input_tensor_fp32;
+    struct tensor* graph_input_tensor_fake_quant;
+    struct exec_graph* exec_graph_fp32;
+    struct exec_graph* exec_graph_fake_quant;
+    int exec_node_num;
+private: // temp variable
+    uint16_t op_name;
+    struct exec_node* node_fp32;
+    struct exec_node* node_fake_quant;
+    struct node_ops* node_ops_fp32;
+    struct node_ops* node_ops_fake_quant;
+    struct tensor* input_tensor_fp32;
+    struct tensor* input_tensor_fake_quant;
+    struct tensor* weight_tensor_fp32;
+    struct tensor* weight_tensor_fake_quant;
+    struct tensor* bias_tensor_fp32;
+    struct tensor* bias_tensor_fake_quant;
+    struct tensor* output_tensor_fp32;
+    struct tensor* output_tensor_fake_quant;
+    float* weight_data_fp32;
+    float* weight_data_fake_quant;
+    uint32_t weight_size;
+    float* interleave_buffer_fp32;
+    float* interleave_buffer_fake_quant;
+    uint32_t interleave_size_fake;
+    float* bias_data_fp32;
+    float* bias_data_fake_quant;
+    uint32_t bias_size;
+    uint32_t output_channel;
+    struct conv_priv_info* conv_priv_info_fp32;
+    struct conv_priv_info* conv_priv_info_fake_quant;
+    struct conv_param* conv_param_fp32;
+    struct conv_param* conv_param_fake_quant;
 };
--- a/tools/quantize/quant_tool_int8.cpp
+++ b/tools/quantize/quant_tool_int8.cpp
@@ -66,6 +66,7 @@ QuantTool::QuantTool()
    this->focus = 0;
    this->inplace = true;
    this->algorithm_type = ALGORITHM_MIN_MAX;
+    this->evaluate = false;
 }
 QuantTool::~QuantTool()
@@ -163,6 +164,7 @@ int QuantTool::activation_quant_tool()
    /* init minmax */
    std::unordered_map<int, float> max_activation;
    std::unordered_map<int, float> min_activation;
+    std::unordered_map<int, int> act_map;
    uint32_t act_tensor_num = 0;
    for (int i = 0; i < ir_graph->tensor_num; i++)
    {
@@ -172,6 +174,7 @@ int QuantTool::activation_quant_tool()
            act_tensor_num++;
            max_activation[i] = -FLT_MAX;
            min_activation[i] = FLT_MAX;
+            act_map[act_tensor_num - 1] = i;
        }
    }
@@ -213,10 +216,134 @@ int QuantTool::activation_quant_tool()
            }
        }
    }
+    fprintf(stderr, "\n");
    if (this->algorithm_type == ALGORITHM_KL)
    {
-        /* todo support */
+        /* kl process divergence */
-        fprintf(stderr, "\r\n[****WARNING****]:Step 2 find original calibration kl threshold table NOT support temporarily!\n");
+        fprintf(stderr, "[Quant Tools Info]: Step 2, find calibration table.\n");
+        std::unordered_map<uint32_t, uint32_t> tensor_hist;
+        std::unordered_map<uint32_t, uint32_t> hist_tensor;
+        std::vector<std::vector<float> > hist_edge;
+        std::vector<std::vector<uint32_t> > hist_gram;
+        /* second loop, create histgram */
+        for (int nums = imgs_list.size() - 1; nums >= 0; nums--)
+        {
+            fprintf(stderr, "\r[Quant Tools Info]: Step 2, images %.5d / %.5d", nums + 1, img_num);
+            get_input_data_cv(imgs_list[nums].c_str(), input_data.data(), img_c, img_h, img_w, mean, scale, sw_RGB, center_crop, letterbox_rows, letterbox_cols, focus);
+            /* run graph */
+            if (run_graph(ir_graph, 1) < 0)
+            {
+                fprintf(stderr, "Run graph failed\n");
+                return -1;
+            }
+            /* calculate hist */
+            uint32_t inum = 0;
+            for (int i = 0; i < ir_graph->tensor_num; i++)
+            {
+                struct tensor* ir_tensor = ir_graph->tensor_list[i];
+                if (ir_tensor->tensor_type == TENSOR_TYPE_VAR || ir_tensor->tensor_type == TENSOR_TYPE_INPUT)
+                {
+                    float step_max = std::abs(max_activation[i]);
+                    if (std::abs(min_activation[i]) > step_max)
+                        step_max = std::abs(min_activation[i]);
+                    float step_bin = step_max / 2048.0f;
+                    std::vector<float> every_edge;
+                    if (nums == imgs_list.size() - 1)
+                    {
+                        for (int j = 0; j < 2048; j++)
+                        {
+                            float edge_float = (step_bin * (j + 0.5f));
+                            every_edge.push_back(edge_float);
+                        }
+                        hist_edge.push_back(every_edge);
+                        hist_gram.push_back(histCount((float*)ir_tensor->data, ir_tensor->elem_num, step_max));
+                    }
+                    else
+                    {
+                        std::vector<uint32_t> hist_tmp;
+                        hist_tmp = histCount((float*)ir_tensor->data, ir_tensor->elem_num, step_max);
+                        for (int j = 0; j < 2048; j++)
+                        {
+                            hist_gram[inum][j] += hist_tmp[j];
+                        }
+                    }
+                    tensor_hist[i] = inum;
+                    hist_tensor[inum] = i;
+                    inum++;
+                }
+            }
+        }
+        fprintf(stderr, "\n");
+        /* save the calibration file with min-max algorithm with kl divergence */
+        int fake_quant_set = 127;
+        FILE* fp_kl = fopen("table_kl.scale", "wb");
+        for (int i = 0; i < act_tensor_num; i++)
+        {
+            struct tensor* t = ir_graph->tensor_list[act_map[i]];
+            int threshold_bin = threshold_distribution(hist_gram[i], fake_quant_set + 1);
+            fprintf(stderr, " threshold_bin %d \n", threshold_bin);
+            float act_scale = hist_edge[i][threshold_bin] / fake_quant_set;
+            int act_zero_point = 0;
+            /* the scale of softmax always is scale = 1 / 127.f */
+            for (int j = 0; j < ir_graph->node_num; j++)
+            {
+                struct node* noden = ir_graph->node_list[j];
+                struct tensor* tensor_tmp = get_ir_graph_tensor(ir_graph, noden->output_tensors[0]);
+                if (!(tensor_tmp->tensor_type == TENSOR_TYPE_INPUT || tensor_tmp->tensor_type == TENSOR_TYPE_VAR))
+                    continue;
+                std::string tmp_op_name = get_op_name_from_type(noden->op.type);
+                std::string cur_name = t->name;
+                std::string tmp_name = tensor_tmp->name;
+                if ((cur_name == tmp_name) && tmp_op_name == "Softmax")
+                {
+                    act_scale = 1 / 127.f;
+                    act_zero_point = 0;
+                    break;
+                }
+            }
+            /* the scale of eltwise */
+            for (int j = 0; j < ir_graph->node_num; j++)
+            {
+                struct node* noden = ir_graph->node_list[j];
+                std::string tmp_op_name = get_op_name_from_type(noden->op.type);
+                if (tmp_op_name == "Eltwise")
+                {
+                    struct tensor* tensor_in0 = get_ir_graph_tensor(ir_graph, noden->input_tensors[0]);
+                    struct tensor* tensor_in1 = get_ir_graph_tensor(ir_graph, noden->input_tensors[1]);
+                    struct tensor* tensor_out = get_ir_graph_tensor(ir_graph, noden->output_tensors[0]);
+                    std::string cur_name = t->name;
+                    std::string tmp_name0 = tensor_in0->name;
+                    std::string tmp_name1 = tensor_in1->name;
+                    if ((cur_name == tmp_name0 || cur_name == tmp_name1))
+                    {
+                        act_scale = tensor_out->scale;
+                        break;
+                    }
+                }
+            }
+            t->scale = act_scale;
+            t->zero_point = 0;
+            fprintf(fp_kl, "%s %f %d\n", t->name, act_scale, act_zero_point);
+        }
+        fclose(fp_kl);
+        fprintf(stderr, "[Quant Tools Info]: Step 2, find calibration table done, output ./table_kl.scale\n");
    }
    else if (this->algorithm_type == ALGORITHM_ACIQ)
    {
@@ -304,7 +431,7 @@ int QuantTool::activation_quant_tool()
        fprintf(stderr, "\r\n[Quant Tools Info]: Step 2, find original calibration minmax threshold table done, output ./table_minmax.scale\n");
    }
-    fprintf(stderr, "[Quant Tools Info]: Thread %d, image nums %d, total time %.2f ms, avg time %.2f ms\n", num_thread, img_num, total_time, total_time / img_num);
+    //    fprintf(stderr, "[Quant Tools Info]: Thread %d, image nums %d, total time %.2f ms, avg time %.2f ms\n", num_thread, img_num, total_time, total_time / img_num);
    /* release tengine */
    postrun_graph(ir_graph);
@@ -343,7 +470,7 @@ int main(int argc, char* argv[])
    QuantTool quant_tool;
    int res;
-    while ((res = getopt(argc, argv, "m:a:f:o:i:g:s:w:b:c:y:k:t:h")) != -1)
+    while ((res = getopt(argc, argv, "m:a:f:o:i:g:s:w:b:c:y:k:z:t:h")) != -1)
    {
        switch (res)
        {
@@ -390,6 +517,9 @@ int main(int argc, char* argv[])
        case 'k':
            quant_tool.focus = atoi(optarg);
            break;
+        case 'z':
+            quant_tool.evaluate = atoi(optarg);
+            break;
        case 't':
            quant_tool.num_thread = atoi(optarg);
            quant_tool.opt.num_thread = atoi(optarg);
@@ -444,35 +574,100 @@ int main(int argc, char* argv[])
    fprintf(stderr, "YOLOv5 focus: %s\n", quant_tool.focus ? "ON" : "OFF");
    fprintf(stderr, "Thread num  : %d\n\n", quant_tool.num_thread);
-    /* using 3rd calibration table file */
+    switch (quant_tool.algorithm_type)
-    if (quant_tool.scale_file.empty())
+    {
+    case ALGORITHM_MIN_MAX:
    {
-        /* select algorithm */
+        if (quant_tool.scale_file.empty())
-        if (quant_tool.algorithm_type == ALGORITHM_MIN_MAX)
        {
            quant_tool.scale_file = "table_minmax.scale";
+            quant_tool.activation_quant_tool();
        }
-        else if (quant_tool.algorithm_type == ALGORITHM_KL)
+        save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
+        /* Evaluate quantitative losses */
+        if (quant_tool.evaluate)
+        {
+            fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
+            quant_tool.assess_quant_loss(0);
+        }
+        break;
+    }
+    case ALGORITHM_KL:
+    {
+        if (quant_tool.scale_file.empty())
        {
            quant_tool.scale_file = "table_kl.scale";
+            quant_tool.activation_quant_tool();
        }
-        else if (quant_tool.algorithm_type == ALGORITHM_ACIQ)
+        save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
+        /* Evaluate quantitative losses */
+        if (quant_tool.evaluate)
+        {
+            fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
+            quant_tool.assess_quant_loss(0);
+        }
+        break;
+    }
+    case ALGORITHM_ACIQ:
+    {
+        if (quant_tool.scale_file.empty())
        {
            quant_tool.scale_file = "table_aciq.scale";
+            quant_tool.activation_quant_tool();
        }
-        else
+        save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
+        /* Evaluate quantitative losses */
+        if (quant_tool.evaluate)
+        {
+            fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
+            quant_tool.assess_quant_loss(0);
+        }
+        break;
+    }
+    case ALGORITHM_DFQ:
+    {
+        quant_tool.data_free_quant();
+        quant_tool.model_file = "test_dfq_fp32.tmfile";
+        if (quant_tool.scale_file.empty())
        {
-            fprintf(stderr, "[Quant Tools Info]: algorithm not specified, using default type MIN MAX\n");
            quant_tool.scale_file = "table_minmax.scale";
+            quant_tool.activation_quant_tool();
        }
+        save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
-        /* quantize activation */
+        /* Evaluate quantitative losses */
-        quant_tool.activation_quant_tool();
+        if (quant_tool.evaluate)
+        {
+            fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
+            quant_tool.assess_quant_loss(0);
+        }
+        break;
+    }
+    case ALGORITHM_MM_EQ:
+    {
+        if (quant_tool.scale_file.empty())
+        {
+            quant_tool.scale_file = "table_minmax.scale";
+            quant_tool.activation_quant_tool();
+        }
+        /* Evaluate quantitative losses */
+        if (quant_tool.evaluate)
+        {
+            fprintf(stderr, "[Quant Tools Info]: Step Evaluate, evaluate quantitative losses\n");
+            quant_tool.assess_quant_loss(0);
+        }
+        /* Enable EQ search */
+        fprintf(stderr, "[Quant Tools Info]: Step Search, enable EQ search\n");
+        quant_tool.quant_search();
+        quant_tool.model_file = "save_i8_eq.tmfile";
+        save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, true);
+        break;
+    }
+    default:
+    {
+        fprintf(stderr, "Unsupported quantization type ... \n");
+        break;
+    }
    }
-    /* quantize weight/bias and save into int8 tmfile */
-    fprintf(stderr, "[Quant Tools Info]: Calibration file is using %s\n", quant_tool.scale_file.c_str());
-    save_graph_i8_perchannel(quant_tool.model_file.c_str(), quant_tool.scale_file.c_str(), quant_tool.output_file, quant_tool.inplace, false);
    fprintf(stderr, "\n---- Tengine Int8 tmfile create success, best wish for your INT8 inference has a low accuracy loss...\\(^0^)/ ----\n");

--- a/tools/quantize/quant_utils.cpp
+++ b/tools/quantize/quant_utils.cpp
@@ -77,7 +77,7 @@ void split(float* array, char* str, const char* del)
 }
 void get_input_data_cv(const char* image_file, float* input_data, int img_c, int img_h, int img_w, const float* mean,
-                       const float* scale, int sw_RGB = 0, int center_crop = 0, int letterbox_rows = 0, int letterbox_cols = 0, int focus = 0)
+                       const float* scale, int sw_RGB = 1, int center_crop = 0, int letterbox_rows = 0, int letterbox_cols = 0, int focus = 0)
 {
    /* only for yolov5s */
    if (focus == 1 && letterbox_rows > 0 && letterbox_cols > 0)
@@ -411,6 +411,22 @@ std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, f
    return hist;
 }
+std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float abs_max)
+{
+    float bin_scale = abs_max / 2047.f;
+    int bin_zp = 0;
+    std::vector<uint32_t> hist(2048);
+    for (int i = 0; i < elem_num; i++)
+    {
+        if (data[i] != 0)
+        {
+            uint32_t hist_idx = round(std::abs(data[i]) / bin_scale);
+            hist[hist_idx]++;
+        }
+    }
+    return hist;
+}
 float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b)
 {
    const size_t length = dist_a.size();

--- a/tools/quantize/quant_utils.hpp
+++ b/tools/quantize/quant_utils.hpp
@@ -40,6 +40,7 @@ void get_input_data_cv(const char* image_file, float* input_data, int img_c, int
 void readFileList(std::string basePath, std::vector<std::string>& imgs);
 std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float max_val, float min_val);
+std::vector<uint32_t> histCount(float* data, uint32_t elem_num, float abs_max);
 float compute_kl_divergence(std::vector<float>& dist_a, std::vector<float>& dist_b);