未验证 提交 9a3d8593 编写于 作者: W Wilber 提交者: GitHub

cherry-pick .Align the code of trt under the develop and release/2.1 branch (#33631)

上级 bd3aa038
......@@ -62,6 +62,25 @@ class ElementwiseWeightOpConverter : public OpConverter {
TensorRTEngine::Weight power_weights{nvinfer1::DataType::kFLOAT, nullptr,
nvinfer1::IShuffleLayer* expand_layer = nullptr;
nvinfer1::IShuffleLayer* squeeze_layer = nullptr;
int dynamic_shape_offset = engine_->with_dynamic_shape() ? 1 : 0;
auto input_dim = X->getDimensions();
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims expand_shape;
expand_shape.nbDims = 3 + dynamic_shape_offset;
for (int i = 0; i < expand_shape.nbDims; i++) {
if (i < input_dim.nbDims) {
expand_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
} else {
expand_shape.d[i] = 1;
expand_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
X = expand_layer->getOutput(0);
if (op_type_ == "add") {
nvinfer1::IScaleLayer* scale_layer = TRT_ENGINE_ADD_LAYER(
engine_, Scale, *X, scale_mode, shift_weights.get(),
......@@ -73,7 +92,17 @@ class ElementwiseWeightOpConverter : public OpConverter {
shift_weights.get(), power_weights.get());
layer = scale_layer;
if (input_dim.nbDims < 3 + dynamic_shape_offset) {
nvinfer1::Dims squeeze_shape;
squeeze_shape.nbDims = input_dim.nbDims;
for (int i = 0; i < squeeze_shape.nbDims; i++) {
squeeze_shape.d[i] = input_dim.d[i] < 0 ? 0 : input_dim.d[i];
squeeze_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *(layer->getOutput(0)));
layer = static_cast<nvinfer1::ILayer*>(squeeze_layer);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "elementwise_" + op_type_, {output_name},
......@@ -37,7 +37,7 @@ class FcOpConverter : public OpConverter {
const framework::Scope& scope, bool test_mode) override {
VLOG(3) << "convert a fluid fc op to tensorrt fc layer without bias";
framework::OpDesc op_desc(op, nullptr);
auto output_name = op_desc.Output("Out").front();
auto input_names = op_desc.InputNames();
bool with_bias = input_names.size() >= 3;
std::string w_name = "Y";
......@@ -48,13 +48,14 @@ class FcOpConverter : public OpConverter {
// Declare inputs
auto* X = engine_->GetITensor(op_desc.Input(i_name).front());
auto x_dim = X->getDimensions();
// Declare weights
auto* Y_v = scope.FindVar(op_desc.Input(w_name).front());
Y_v, platform::errors::NotFound(
"Can not find %s presistale var of fc in scope.", w_name));
auto* Y_t = Y_v->GetMutable<framework::LoDTensor>();
const int x_num_col_dims =
int x_num_col_dims =
? BOOST_GET_CONST(int, op_desc.GetAttr("x_num_col_dims"))
: (op_desc.HasAttr("in_num_col_dims")
......@@ -106,8 +107,8 @@ class FcOpConverter : public OpConverter {
auto regist_fc = [&](nvinfer1::ITensor* inputs, int n_output,
TensorRTEngine::Weight& weight,
TensorRTEngine::Weight& bias) {
nvinfer1::ILayer* fc_layer = nullptr;
if (enable_int8) {
// add conv layer
op_desc.HasAttr("out_threshold"), true,
......@@ -115,22 +116,52 @@ class FcOpConverter : public OpConverter {
float out_scale =
BOOST_GET_CONST(float, op_desc.GetAttr("out_threshold"));
nvinfer1::DimsHW nv_ksize(1, 1);
fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
nv_ksize, weight.get(), bias.get());
engine_->SetTensorDynamicRange(fc_layer->getOutput(0), out_scale);
} else {
fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs,
n_output, weight.get(), bias.get());
auto output_name = op_desc.Output("Out").front();
if (activation_type == "relu") {
nvinfer1::IActivationLayer* relu_layer =
TRT_ENGINE_ADD_LAYER(engine_, Activation, *(fc_layer->getOutput(0)),
RreplenishLayerAndOutput(relu_layer, "fc", {output_name}, test_mode);
auto* fc_layer_int8 =
TRT_ENGINE_ADD_LAYER(engine_, Convolution, *inputs, n_output,
nv_ksize, weight.get(), bias.get());
engine_->SetTensorDynamicRange(fc_layer_int8->getOutput(0), out_scale);
if (activation_type == "relu") {
nvinfer1::IActivationLayer* relu_layer_int8 = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_int8->getOutput(0)),
RreplenishLayerAndOutput(relu_layer_int8, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_int8, "shuffle_after_fc",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(fc_layer, "fc", {output_name}, test_mode);
// add fc layer
auto* fc_layer_before =
TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *inputs, n_output,
weight.get(), bias.get());
("fc_layer_before(Output: " + output_name + ")").c_str());
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 1) {
// If use tensorrt'oss, the x_dim and x_num_col_dims need change
reshape_after_fc_dim.nbDims = 4;
} else {
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
auto* fc_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Shuffle, *fc_layer_before->getOutput(0));
if (activation_type == "relu") {
nvinfer1::IActivationLayer* relu_layer_float = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(fc_layer_float->getOutput(0)),
RreplenishLayerAndOutput(relu_layer_float, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(fc_layer_float, "shuffle_after_fc",
{output_name}, test_mode);
......@@ -157,153 +188,47 @@ class FcOpConverter : public OpConverter {
if (engine_->with_dynamic_shape()) {
// not NCHW layout, but NLP layout with added 'x 1 x 1'
auto x_dim = X->getDimensions();
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) {
// fc which is just after self attention
regist_fc(X, n_output, weight, bias);
x_dim.nbDims - x_num_col_dims, 3,
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims - x_num_col_dims <= 3, but "
"x_dim.nbDims = %d, x_num_col_dims = %d.",
x_dim.nbDims, x_num_col_dims));
auto output_name = op_desc.Output("Out").front();
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
// padding shape "x 1 x 1"
int padding_length = 3 - (x_dim.nbDims - x_num_col_dims);
reshape_before_fc_dim.nbDims = x_dim.nbDims + padding_length;
int cur_dim_index = reshape_before_fc_dim.nbDims - 1;
while (padding_length-- > 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 1;
while (cur_dim_index >= 0) {
reshape_before_fc_dim.d[cur_dim_index--] = 0;
auto* reshape_before_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
("shuffle_before_fc(Output: " + output_name + ")").c_str());
// add fc layer
auto* fc_layer = TRT_ENGINE_ADD_LAYER(
engine_, FullyConnected, *reshape_before_fc_layer->getOutput(0),
n_output, weight.get(), bias.get());
fc_layer->setName(("fc_layer(Output: " + output_name + ")").c_str());
// add shuffle after fc
nvinfer1::Dims reshape_after_fc_dim;
reshape_after_fc_dim.nbDims = x_num_col_dims + 1;
for (int i = 0; i < reshape_after_fc_dim.nbDims; i++) {
reshape_after_fc_dim.d[i] = 0;
auto* reshape_after_fc_layer =
TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *fc_layer->getOutput(0));
if (activation_type == "relu") {
("shuffle_after_fc(Output: " + output_name + ")").c_str());
nvinfer1::IActivationLayer* relu_layer = TRT_ENGINE_ADD_LAYER(
engine_, Activation, *(reshape_after_fc_layer->getOutput(0)),
RreplenishLayerAndOutput(relu_layer, "relu_after_fc_shuffle",
{output_name}, test_mode);
} else {
RreplenishLayerAndOutput(reshape_after_fc_layer, "shuffle_after_fc",
{output_name}, test_mode);
// Running the TRT Static Shape mode: x_num_col_dims-1
if (!engine_->with_dynamic_shape()) {
// in order to handle situations in NLP models(input dims < 3,
// x_num_col_dims != 1, etc.), reshape input to perform FC correctly.
auto* reshape_itensor = X;
int input_dims = X->getDimensions().nbDims;
auto input_d = X->getDimensions().d;
int reshape_dim3[3] = {0};
int reshape_dim4[4] = {0};
PADDLE_ENFORCE_LE(x_num_col_dims, input_dims,
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_num_col_dims <= input dims"));
if (x_num_col_dims == 1) {
if (input_dims == 4) {
input_d[3], 1,
"Invalid dimensions. When x_num_col_dims equals to 1 and input "
"dims equals to 4, the last dim of input must be 1, but got %d",
if (enable_int8) {
reshape_dim3[0] = 1;
for (int i = 0; i < 3; i++) {
reshape_dim3[0] *= input_d[i];
if (i > 0) {
reshape_dim3[i] = 1;
} else {
for (int i = 0; i < 3; i++) {
if (i < input_dims) {
reshape_dim3[i] = input_d[i];
} else {
reshape_dim3[i] = 1;
nvinfer1::Dims3 reshape_dim(reshape_dim3[0], reshape_dim3[1],
auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_itensor = reshape_layer->getOutput(0);
if (enable_int8) {
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
} else {
PADDLE_ENFORCE_NE(input_dims, 1,
"Invalid dimensions. When x_num_col_dims equals to "
"2, input_dims should not be 1"));
if (enable_int8) {
for (int i = 0; i < 4; i++) {
if (i == 0) {
reshape_dim4[i] = input_d[i];
} else {
reshape_dim4[i] = 1;
if (i < input_dims) {
reshape_dim4[1] *= input_d[i];
// If use tensorrt'oss, the x_dim and x_num_col_dims need change
if (engine_->use_oss() && engine_->with_ernie() && x_dim.nbDims == 4 &&
x_dim.d[2] == 1 && x_dim.d[3] == 1 && x_num_col_dims == 2) {
x_num_col_dims = 1;
x_dim.nbDims, x_num_col_dims,
"Params and input dims mismatch. Paddle-TRT FC "
"converter expects x_dim.nbDims > x_num_col_dims, but "
"x_dim.nbDims : %d, x_num_col_dims : %d.",
x_dim.nbDims, x_num_col_dims));
// add shuffle before fc
nvinfer1::Dims reshape_before_fc_dim;
reshape_before_fc_dim.nbDims = x_num_col_dims + 3;
// padding shape "* x q x 1 x 1"
for (int i = 0; i < reshape_before_fc_dim.nbDims; i++) {
reshape_before_fc_dim.d[i] = 1;
for (int i = 0; i < x_dim.nbDims; i++) {
if (i < x_num_col_dims) {
reshape_before_fc_dim.d[i] = 0;
} else {
for (int i = 0; i < 4; i++) {
if (i < input_dims) {
reshape_dim4[i] = input_d[i];
} else {
reshape_dim4[i] = 1;
if (x_dim.d[i] < 0) {
reshape_before_fc_dim.d[x_num_col_dims] = -1;
reshape_before_fc_dim.d[x_num_col_dims] *= x_dim.d[i];
nvinfer1::Dims4 reshape_dim(reshape_dim4[0], reshape_dim4[1],
reshape_dim4[2], reshape_dim4[3]);
auto* reshape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
reshape_itensor = reshape_layer->getOutput(0);
if (enable_int8) {
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
auto* reshape_before_fc_layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *X);
("shuffle_before_fc(Output: " + output_name + ")").c_str());
auto* reshape_itensor = reshape_before_fc_layer->getOutput(0);
if (enable_int8) {
engine_->SetTensorDynamicRange(reshape_itensor, in_scale);
regist_fc(reshape_itensor, n_output, weight, bias);
......@@ -25,7 +25,7 @@ namespace inference {
namespace tensorrt {
* FlattenOp, only support static shape mode currently.
* FlattenOp trt converter
class FlattenOpConverter : public OpConverter {
......@@ -35,21 +35,48 @@ class FlattenOpConverter : public OpConverter {
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
int dims = input->getDimensions().nbDims;
nvinfer1::IShuffleLayer* layer = nullptr;
if (!engine_->with_dynamic_shape()) {
int dim_prod = 1;
for (int i = 0; i < dims; i++) {
int dim_i = input->getDimensions().d[i];
dim_i, 0,
"flatten input dim should be > 0, but got %d.", dim_i));
dim_prod *= dim_i;
nvinfer1::Dims flatten_dim;
flatten_dim.nbDims = 1;
flatten_dim.d[0] = dim_prod;
layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
} else {
auto* shape_layer = TRT_ENGINE_ADD_LAYER(engine_, Shape, *input);
uint32_t reduce_dim = 1;
int dim_prod = 1;
for (int i = 0; i < dims; i++) {
int dim_i = input->getDimensions().d[i];
dim_i, 0, platform::errors::InvalidArgument(
"flatten input dim should be > 0, but got %d.", dim_i));
dim_prod *= dim_i;
auto* reduce_prod_layer = TRT_ENGINE_ADD_LAYER(
engine_, Reduce, *(shape_layer->getOutput(0)),
nvinfer1::ReduceOperation::kPROD, reduce_dim, true);
int32_t* constant_weight_data = new int32_t[1];
constant_weight_data[0] = -1;
TensorRTEngine::Weight constant_weight{
nvinfer1::DataType::kINT32, static_cast<void*>(constant_weight_data),
nvinfer1::Dims constant_dims;
constant_dims.nbDims = 1;
constant_dims.d[0] = 1;
auto* constant_layer = TRT_ENGINE_ADD_LAYER(
engine_, Constant, constant_dims, constant_weight.get());
std::vector<nvinfer1::ITensor*> itensors;
auto* concat_layer =
TRT_ENGINE_ADD_LAYER(engine_, Concatenation, itensors.data(), 2);
layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
layer->setInput(1, *(concat_layer->getOutput(0)));
nvinfer1::Dims flatten_dim;
flatten_dim.nbDims = 1;
flatten_dim.d[0] = dim_prod;
auto* layer = TRT_ENGINE_ADD_LAYER(engine_, Shuffle, *input);
auto output_name = op_desc.Output("Out")[0];
RreplenishLayerAndOutput(layer, "flatten", {output_name}, test_mode);
......@@ -34,7 +34,7 @@ class ReshapeOpConverter : public OpConverter {
framework::OpDesc op_desc(op, nullptr);
// Declare inputs
auto* input = engine_->GetITensor(op_desc.Input("X")[0]);
const std::vector<int>& shape =
std::vector<int> shape =
BOOST_GET_CONST(std::vector<int>, op_desc.GetAttr("shape"));
int nbDims_num = shape.size();
nvinfer1::Dims reshape_dim;
......@@ -300,23 +300,14 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (axis.size() >= nvinfer1::Dims::MAX_DIMS) return false;
if (op_type == "flatten2") {
// flatten doesn't support dynamic shape currently
if (!desc.HasAttr("axis")) {
return false;
} else {
if (with_dynamic_shape) return false;
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false;
if (op_type == "flatten") {
// flatten doesn't support dynamic shape currently
if (op_type == "flatten2" || op_type == "flatten") {
if (!desc.HasAttr("axis")) {
return false;
} else {
if (with_dynamic_shape) return false;
int axis = BOOST_GET_CONST(int, desc.GetAttr("axis"));
if (axis != 1) return false;
......@@ -685,20 +676,19 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (op_type == "reduce_sum") {
if (!with_dynamic_shape) {
VLOG(3) << "the reduce_sum does not support static shape yet";
return false;
if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
desc.HasAttr("reduce_all"))) {
VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or "
if (op_type == "fc") {
int x_num_col_dims =
? BOOST_GET_CONST(int, desc.GetAttr("x_num_col_dims"))
: (desc.HasAttr("in_num_col_dims")
? BOOST_GET_CONST(int, desc.GetAttr("in_num_col_dims"))
: 1);
if (x_num_col_dims < 1) {
VLOG(3) << "converter expects x_num_col_dims >= 1, "
"but x_num_col_dims = %d.";
return false;
if (op_type == "reshape" || op_type == "reshape2") {
if (!desc.HasAttr("shape")) {
return false;
......@@ -712,6 +702,21 @@ bool OpTeller::Tell(const framework::ir::Node* node, bool use_no_calib_int8,
if (shape.size() >= nvinfer1::Dims::MAX_DIMS) return false;
if (op_type == "reduce_sum") {
if (!with_dynamic_shape) {
VLOG(3) << "the reduce_sum does not support static shape yet";
return false;
if (!(desc.HasAttr("keep_dim") && desc.HasAttr("dim") &&
desc.HasAttr("reduce_all"))) {
VLOG(3) << "the reduce_sum does not have attr (keep_dim or dim or "
return false;
if ((*teller)(op_type, desc, use_no_calib_int8)) return true;
return false;
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
想要评论请 注册