From 9a3d859390c233afa5ce0baf8cfceb182d89025e Mon Sep 17 00:00:00 2001 From: Wilber Date: Fri, 18 Jun 2021 15:23:18 +0800 Subject: [PATCH] cherry-pick .Align the code of trt under the develop and release/2.1 branch (#33631) --- .../tensorrt/convert/elementwise_op.cc | 31 ++- .../fluid/inference/tensorrt/convert/fc_op.cc | 247 ++++++------------ .../inference/tensorrt/convert/flatten_op.cc | 55 +++- .../inference/tensorrt/convert/reshape_op.cc | 2 +- paddle/fluid/inference/tensorrt/op_teller.cc | 53 ++-- 5 files changed, 187 insertions(+), 201 deletions(-) diff --git a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc index 5419933e407..df240085441 100644 --- a/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/elementwise_op.cc @@ -62,6 +62,25 @@ class ElementwiseWeightOpConverter : public OpConverter { 0}; TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr, 0}; + + nvinfer1::IShuffleLayer* expand_layer = nullptr; + nvinfer1::IShuffleLayer* squeeze_layer = nullptr; + int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0; + auto input_dim = X->getDimensions(); + if (input_dim.nbDims < 3 + dynamic_shape_offset) { + nvinfer1::Dims expand_shape; + expand_shape.nbDims = 3 + dynamic_shape_offset; + for (int i = 0; i < expand_shape.nbDims; i++) { + if (i < input_dim.nbDims) { + expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i]; + } else { + expand_shape.d[i] = 1; + } + } + expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); + expand_layer->setReshapeDimensions(expand_shape); + X = expand_layer->getOutput(0); + } if (op_type_ == "add") { nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER( engine_, Scale, *X, scale_mode, shift_weights.get(), @@ -73,7 +92,17 @@ class ElementwiseWeightOpConverter : public OpConverter { shift_weights.get(), power_weights.get()); layer = scale_layer; } - + if (input_dim.nbDims < 3 + dynamic_shape_offset) { + nvinfer1::Dims squeeze_shape; + squeeze_shape.nbDims = input_dim.nbDims; + for (int i = 0; i < squeeze_shape.nbDims; i++) { + squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i]; + } + squeeze_layer = + TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0))); + squeeze_layer->setReshapeDimensions(squeeze_shape); + layer = static_cast(squeeze_layer); + } auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name}, test_mode); diff --git a/paddle/fluid/inference/tensorrt/convert/fc_op.cc b/paddle/fluid/inference/tensorrt/convert/fc_op.cc index 6167e68df2b..74bb854e55f 100644 --- a/paddle/fluid/inference/tensorrt/convert/fc_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/fc_op.cc @@ -37,7 +37,7 @@ class FcOpConverter : public OpConverter { const framework::Scope& scope, bool test_mode) override { VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias"; framework::OpDesc op_desc(op, nullptr); - + auto output_name = op_desc.Output("Out").front(); auto input_names = op_desc.InputNames(); bool with_bias = input_names.size() >= 3; std::string w_name = "Y"; @@ -48,13 +48,14 @@ class FcOpConverter : public OpConverter { } // Declare inputs auto* X = engine_->GetITensor(op_desc.Input(i_name).front()); + auto x_dim = X->getDimensions(); // Declare weights auto* Y_v = scope.FindVar(op_desc.Input(w_name).front()); PADDLE_ENFORCE_NOT_NULL( Y_v, platform::errors::NotFound( "Can not find %s presistale var of fc in scope.", w_name)); auto* Y_t = Y_v->GetMutable(); - const int x_num_col_dims = + int x_num_col_dims = op_desc.HasAttr("x_num_col_dims") ? BOOST_GET_CONST(int, op_desc.GetAttr("x_num_col_dims")) : (op_desc.HasAttr("in_num_col_dims") @@ -106,8 +107,8 @@ class FcOpConverter : public OpConverter { auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output, TensorRTEngine::Weight& weight, TensorRTEngine::Weight& bias) { - nvinfer1::ILayer* fc_layer = nullptr; if (enable_int8) { + // add conv layer PADDLE_ENFORCE_EQ( op_desc.HasAttr("out_threshold"), true, platform::errors::InvalidArgument( @@ -115,22 +116,52 @@ class FcOpConverter : public OpConverter { float out_scale = BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold")); nvinfer1::DimsHW nv_ksize(1, 1); - fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output, - nv_ksize, weight.get(), bias.get()); - engine_->SetTensorDynamicRange(fc_layer->getOutput(0), out_scale); - } else { - fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, - n_output, weight.get(), bias.get()); - } - - auto output_name = op_desc.Output("Out").front(); - if (activation_type == "relu") { - nvinfer1::IActivationLayer* relu_layer = - TRT_ENGINE_ADD_LAYER(engine_, Activation, *(fc_layer->getOutput(0)), - nvinfer1::ActivationType::kRELU); - RreplenishLayerAndOutput(relu_layer, "fc", {output_name}, test_mode); + auto* fc_layer_int8 = + TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output, + nv_ksize, weight.get(), bias.get()); + engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0), out_scale); + if (activation_type == "relu") { + nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER( + engine_, Activation, *(fc_layer_int8->getOutput(0)), + nvinfer1::ActivationType::kRELU); + RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle", + {output_name}, test_mode); + } else { + RreplenishLayerAndOutput(fc_layer_int8, "shuffle_after_fc", + {output_name}, test_mode); + } } else { - RreplenishLayerAndOutput(fc_layer, "fc", {output_name}, test_mode); + // add fc layer + auto* fc_layer_before = + TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output, + weight.get(), bias.get()); + fc_layer_before->setName( + ("fc_layer_before(Output: " + output_name + ")").c_str()); + // add shuffle after fc + nvinfer1::Dims reshape_after_fc_dim; + if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 && + x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 1) { + // If use tensorrt'oss, the x_dim and x_num_col_dims need change + reshape_after_fc_dim.nbDims = 4; + } else { + reshape_after_fc_dim.nbDims = x_num_col_dims + 1; + } + for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) { + reshape_after_fc_dim.d[i] = 0; + } + auto* fc_layer_float = TRT_ENGINE_ADD_LAYER( + engine_, Shuffle, *fc_layer_before->getOutput(0)); + fc_layer_float->setReshapeDimensions(reshape_after_fc_dim); + if (activation_type == "relu") { + nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER( + engine_, Activation, *(fc_layer_float->getOutput(0)), + nvinfer1::ActivationType::kRELU); + RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle", + {output_name}, test_mode); + } else { + RreplenishLayerAndOutput(fc_layer_float, "shuffle_after_fc", + {output_name}, test_mode); + } } }; @@ -157,153 +188,47 @@ class FcOpConverter : public OpConverter { static_cast(bias_data), static_cast(bias_num)}; - if (engine_->with_dynamic_shape()) { - // not NCHW layout, but NLP layout with added 'x 1 x 1' - auto x_dim = X->getDimensions(); - if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 && - x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) { - // fc which is just after self attention - regist_fc(X, n_output, weight, bias); - return; - } - PADDLE_ENFORCE_LE( - x_dim.nbDims - x_num_col_dims, 3, - platform::errors::InvalidArgument( - "Params and input dims mismatch. Paddle-TRT FC " - "converter expects x_dim.nbDims - x_num_col_dims <= 3, but " - "x_dim.nbDims = %d, x_num_col_dims = %d.", - x_dim.nbDims, x_num_col_dims)); - auto output_name = op_desc.Output("Out").front(); - // add shuffle before fc - nvinfer1::Dims reshape_before_fc_dim; - // padding shape "x 1 x 1" - int padding_length = 3 - (x_dim.nbDims - x_num_col_dims); - reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length; - int cur_dim_index = reshape_before_fc_dim.nbDims - 1; - while (padding_length-- > 0) { - reshape_before_fc_dim.d[cur_dim_index--] = 1; - } - while (cur_dim_index >= 0) { - reshape_before_fc_dim.d[cur_dim_index--] = 0; - } - - auto* reshape_before_fc_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); - reshape_before_fc_layer->setName( - ("shuffle_before_fc(Output: " + output_name + ")").c_str()); - - // add fc layer - auto* fc_layer = TRT_ENGINE_ADD_LAYER( - engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0), - n_output, weight.get(), bias.get()); - fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str()); - - // add shuffle after fc - nvinfer1::Dims reshape_after_fc_dim; - reshape_after_fc_dim.nbDims = x_num_col_dims + 1; - for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) { - reshape_after_fc_dim.d[i] = 0; - } - - auto* reshape_after_fc_layer = - TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0)); - reshape_after_fc_layer->setReshapeDimensions(reshape_after_fc_dim); - - if (activation_type == "relu") { - reshape_after_fc_layer->setName( - ("shuffle_after_fc(Output: " + output_name + ")").c_str()); - nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER( - engine_, Activation, *(reshape_after_fc_layer->getOutput(0)), - nvinfer1::ActivationType::kRELU); - RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle", - {output_name}, test_mode); - } else { - RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc", - {output_name}, test_mode); - } - return; + // Running the TRT Static Shape mode: x_num_col_dims-1 + if (!engine_->with_dynamic_shape()) { + x_num_col_dims--; } - // in order to handle situations in NLP models(input dims < 3, - // x_num_col_dims != 1, etc.), reshape input to perform FC correctly. - auto* reshape_itensor = X; - int input_dims = X->getDimensions().nbDims; - auto input_d = X->getDimensions().d; - int reshape_dim3[3] = {0}; - int reshape_dim4[4] = {0}; - PADDLE_ENFORCE_LE(x_num_col_dims, input_dims, - platform::errors::InvalidArgument( - "Params and input dims mismatch. Paddle-TRT FC " - "converter expects x_num_col_dims <= input dims")); - if (x_num_col_dims == 1) { - if (input_dims == 4) { - PADDLE_ENFORCE_EQ( - input_d[3], 1, - platform::errors::InvalidArgument( - "Invalid dimensions. When x_num_col_dims equals to 1 and input " - "dims equals to 4, the last dim of input must be 1, but got %d", - input_d[3])); - } - if (enable_int8) { - reshape_dim3[0] = 1; - for (int i = 0; i < 3; i++) { - reshape_dim3[0] *= input_d[i]; - if (i > 0) { - reshape_dim3[i] = 1; - } - } - } else { - for (int i = 0; i < 3; i++) { - if (i < input_dims) { - reshape_dim3[i] = input_d[i]; - } else { - reshape_dim3[i] = 1; - } - } - } - - nvinfer1::Dims3 reshape_dim(reshape_dim3[0], reshape_dim3[1], - reshape_dim3[2]); - auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - reshape_layer->setReshapeDimensions(reshape_dim); - reshape_itensor = reshape_layer->getOutput(0); - if (enable_int8) { - engine_->SetTensorDynamicRange(reshape_itensor, in_scale); - } - } else { - PADDLE_ENFORCE_NE(input_dims, 1, - platform::errors::InvalidArgument( - "Invalid dimensions. When x_num_col_dims equals to " - "2, input_dims should not be 1")); - - if (enable_int8) { - for (int i = 0; i < 4; i++) { - if (i == 0) { - reshape_dim4[i] = input_d[i]; - } else { - reshape_dim4[i] = 1; - if (i < input_dims) { - reshape_dim4[1] *= input_d[i]; - } - } - } + // If use tensorrt'oss, the x_dim and x_num_col_dims need change + if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 && + x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) { + x_num_col_dims = 1; + } + PADDLE_ENFORCE_GT( + x_dim.nbDims, x_num_col_dims, + platform::errors::InvalidArgument( + "Params and input dims mismatch. Paddle-TRT FC " + "converter expects x_dim.nbDims > x_num_col_dims, but " + "x_dim.nbDims : %d, x_num_col_dims : %d.", + x_dim.nbDims, x_num_col_dims)); + // add shuffle before fc + nvinfer1::Dims reshape_before_fc_dim; + reshape_before_fc_dim.nbDims = x_num_col_dims + 3; + // padding shape "* x q x 1 x 1" + for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) { + reshape_before_fc_dim.d[i] = 1; + } + for (int i = 0; i < x_dim.nbDims; i++) { + if (i < x_num_col_dims) { + reshape_before_fc_dim.d[i] = 0; } else { - for (int i = 0; i < 4; i++) { - if (i < input_dims) { - reshape_dim4[i] = input_d[i]; - } else { - reshape_dim4[i] = 1; - } + if (x_dim.d[i] < 0) { + reshape_before_fc_dim.d[x_num_col_dims] = -1; + break; } + reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i]; } - nvinfer1::Dims4 reshape_dim(reshape_dim4[0], reshape_dim4[1], - reshape_dim4[2], reshape_dim4[3]); - auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); - reshape_layer->setReshapeDimensions(reshape_dim); - reshape_itensor = reshape_layer->getOutput(0); - if (enable_int8) { - engine_->SetTensorDynamicRange(reshape_itensor, in_scale); - } + } + auto* reshape_before_fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X); + reshape_before_fc_layer->setReshapeDimensions(reshape_before_fc_dim); + reshape_before_fc_layer->setName( + ("shuffle_before_fc(Output: " + output_name + ")").c_str()); + auto* reshape_itensor = reshape_before_fc_layer->getOutput(0); + if (enable_int8) { + engine_->SetTensorDynamicRange(reshape_itensor, in_scale); } regist_fc(reshape_itensor, n_output, weight, bias); } diff --git a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc index 03a1c167246..322b42667fa 100644 --- a/paddle/fluid/inference/tensorrt/convert/flatten_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/flatten_op.cc @@ -25,7 +25,7 @@ namespace inference { namespace tensorrt { /* - * FlattenOp, only support static shape mode currently. + * FlattenOp trt converter */ class FlattenOpConverter : public OpConverter { public: @@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter { // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); int dims = input->getDimensions().nbDims; + nvinfer1::IShuffleLayer* layer = nullptr; + if (!engine_->with_dynamic_shape()) { + int dim_prod = 1; + for (int i = 0; i < dims; i++) { + int dim_i = input->getDimensions().d[i]; + PADDLE_ENFORCE_GT( + dim_i, 0, + platform::errors::InvalidArgument( + "flatten input dim should be > 0, but got %d.", dim_i)); + dim_prod *= dim_i; + } + nvinfer1::Dims flatten_dim; + flatten_dim.nbDims = 1; + flatten_dim.d[0] = dim_prod; + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setReshapeDimensions(flatten_dim); + } else { + auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input); + uint32_t reduce_dim = 1; - int dim_prod = 1; - for (int i = 0; i < dims; i++) { - int dim_i = input->getDimensions().d[i]; - PADDLE_ENFORCE_GT( - dim_i, 0, platform::errors::InvalidArgument( - "flatten input dim should be > 0, but got %d.", dim_i)); - dim_prod *= dim_i; + auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER( + engine_, Reduce, *(shape_layer->getOutput(0)), + nvinfer1::ReduceOperation::kPROD, reduce_dim, true); + int32_t* constant_weight_data = new int32_t[1]; + constant_weight_data[0] = -1; + TensorRTEngine::Weight constant_weight{ + nvinfer1::DataType::kINT32, static_cast(constant_weight_data), + 1}; + nvinfer1::Dims constant_dims; + constant_dims.nbDims = 1; + constant_dims.d[0] = 1; + auto* constant_layer = TRT_ENGINE_ADD_LAYER( + engine_, Constant, constant_dims, constant_weight.get()); + std::vector itensors; + itensors.push_back(constant_layer->getOutput(0)); + itensors.push_back(reduce_prod_layer->getOutput(0)); + auto* concat_layer = + TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), 2); + concat_layer->setAxis(0); + layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); + layer->setInput(1, *(concat_layer->getOutput(0))); } - nvinfer1::Dims flatten_dim; - flatten_dim.nbDims = 1; - flatten_dim.d[0] = dim_prod; - auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input); - layer->setReshapeDimensions(flatten_dim); - auto output_name = op_desc.Output("Out")[0]; RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode); } diff --git a/paddle/fluid/inference/tensorrt/convert/reshape_op.cc b/paddle/fluid/inference/tensorrt/convert/reshape_op.cc index 3d8c72728c6..489603e20cd 100644 --- a/paddle/fluid/inference/tensorrt/convert/reshape_op.cc +++ b/paddle/fluid/inference/tensorrt/convert/reshape_op.cc @@ -34,7 +34,7 @@ class ReshapeOpConverter : public OpConverter { framework::OpDesc op_desc(op, nullptr); // Declare inputs auto* input = engine_->GetITensor(op_desc.Input("X")[0]); - const std::vector& shape = + std::vector shape = BOOST_GET_CONST(std::vector, op_desc.GetAttr("shape")); int nbDims_num = shape.size(); nvinfer1::Dims reshape_dim; diff --git a/paddle/fluid/inference/tensorrt/op_teller.cc b/paddle/fluid/inference/tensorrt/op_teller.cc index 44611d1d595..59b196e3d92 100644 --- a/paddle/fluid/inference/tensorrt/op_teller.cc +++ b/paddle/fluid/inference/tensorrt/op_teller.cc @@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false; } } - if (op_type == "flatten2") { - // flatten doesn't support dynamic shape currently - if (!desc.HasAttr("axis")) { - return false; - } else { - if (with_dynamic_shape) return false; - int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); - if (axis != 1) return false; - } - } - - if (op_type == "flatten") { - // flatten doesn't support dynamic shape currently + if (op_type == "flatten2" || op_type == "flatten") { if (!desc.HasAttr("axis")) { return false; } else { +#if IS_TRT_VERSION_GE(7130) +#else if (with_dynamic_shape) return false; +#endif int axis = BOOST_GET_CONST(int, desc.GetAttr("axis")); if (axis != 1) return false; } @@ -685,20 +676,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, } } - if (op_type == "reduce_sum") { - if (!with_dynamic_shape) { - VLOG(3) << "the reduce_sum does not support static shape yet"; - return false; - } - - if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") && - desc.HasAttr("reduce_all"))) { - VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or " - "reduce_all)"; + if (op_type == "fc") { + int x_num_col_dims = + desc.HasAttr("x_num_col_dims") + ? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims")) + : (desc.HasAttr("in_num_col_dims") + ? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims")) + : 1); + if (x_num_col_dims < 1) { + VLOG(3) << "converter expects x_num_col_dims >= 1, " + "but x_num_col_dims = %d."; return false; } } - if (op_type == "reshape" || op_type == "reshape2") { if (!desc.HasAttr("shape")) { return false; @@ -712,6 +702,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8, if (shape.size() >= nvinfer1::Dims::MAX_DIMS) return false; } } + + if (op_type == "reduce_sum") { + if (!with_dynamic_shape) { + VLOG(3) << "the reduce_sum does not support static shape yet"; + return false; + } + + if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") && + desc.HasAttr("reduce_all"))) { + VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or " + "reduce_all)"; + return false; + } + } + if ((*teller)(op_type, desc, use_no_calib_int8)) return true; } return false; -- GitLab