From b37b0d00f1f5e1ebb09058dc7cfa87b67c772436 Mon Sep 17 00:00:00 2001 From: Raman Sarokin Date: Fri, 30 Oct 2020 17:00:03 -0700 Subject: [PATCH] DepthwiseConvolution generation changed to use storage type properties instead of specific storage types. PiperOrigin-RevId: 339964553 Change-Id: I2cded9c306a40b136002c08e610daca2d75e1758 --- .../gpu/cl/kernels/depthwise_conv.cc | 143 +++++++++--------- 1 file changed, 72 insertions(+), 71 deletions(-) diff --git a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc index 05d5d086bc7..fca32db392c 100644 --- a/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc +++ b/tensorflow/lite/delegates/gpu/cl/kernels/depthwise_conv.cc @@ -86,13 +86,8 @@ std::string GenerateDepthwiseConvolutionCode( } op->AddDstTensor("dst_tensor", dst_desc); - const auto src_tensor_type = op_def.src_tensors[0].storage_type; - std::string c = GetCommonDefines(op_def.precision); - const bool manual_clamp = src_tensor_type == TensorStorageType::BUFFER || - src_tensor_type == TensorStorageType::IMAGE_BUFFER; - c += "__kernel void main_function(\n"; c += "$0) {\n"; c += " int X = get_global_id(0);\n"; @@ -142,84 +137,91 @@ std::string GenerateDepthwiseConvolutionCode( std::string kernel_size_z = dynamic_weights ? "args.weights.Depth()" : "args.kernel_size_z"; - std::string flat_coords = "x_c, y_c"; - if (manual_clamp) { - std::string check = "!outside_x && !outside_y"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - check += " && !outside_z"; - flat_coords += ", z_c"; - c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n"; - c += " int z_c = z_offseted + kz * args.dilation_z;\n"; - c += " bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n"; - } - c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n"; - c += " int y_c = y_offseted + ky * args.dilation_y;\n"; - c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n"; - c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n"; - const std::string dilation_x = - op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" - : "args.dilation_x"; - c += " int x_c = x_offseted + kx * " + dilation_x + ";\n"; - c += " bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n"; - c += " if (" + check + ") {\n"; - if (dynamic_weights) { - c += " FLT4 f = args.weights.Read(kx, ky, S);\n"; - } else { - if (weights_are_buffer) { - c += " FLT4 f = args.weights.Read(fx_c);\n"; - } else { - c += " FLT4 f = args.weights.Read(fx_c, S);\n"; + auto generate_check = [&]() { + std::string check; + const std::vector axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH}; + const std::vector names{"outside_x", "outside_y", "outside_z"}; + for (int i = 0; i < axes.size(); ++i) { + const auto& axis = axes[i]; + if (src_desc.HasAxis(axis) && !src_desc.SupportsZeroClamp(axis)) { + if (!check.empty()) { + check += " && "; + } + check += "!" + names[i]; } } - c += GetSrcValue(channel_multiplier, flat_coords); - c += " r += TO_ACCUM_TYPE(src_final * f);\n"; - c += " };\n"; - if (!dynamic_weights) { - c += " fx_c++;\n"; - } - c += " }\n"; - c += " }\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - c += " }\n"; - } - } else { // Texture types with ZERO clamping - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - flat_coords += ", z_c"; - c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n"; - c += " int z_c = z_offseted + kz * args.dilation_z;\n"; - if (src_tensor_type != - TensorStorageType::TEXTURE_3D) { // Only TEXTURE_3D supports clamping - // in DEPTH dimension - c += " if (z_c < 0 || z_c >= args.src_tensor.Depth()) {\n"; - c += " fx_c += args.kernel_size_y * args.kernel_size_x;\n"; - c += " continue;\n"; - c += " }\n"; + return check; + }; + auto generate_coords = [&]() { + std::string check; + const std::vector axes{Axis::WIDTH, Axis::HEIGHT, Axis::DEPTH}; + const std::vector names{"x_c", "y_c", "z_c"}; + for (int i = 0; i < axes.size(); ++i) { + const auto& axis = axes[i]; + if (src_desc.HasAxis(axis)) { + if (!check.empty()) { + check += ", "; + } + check += names[i]; } } + return check; + }; + const std::string check = generate_check(); + const std::string coords = generate_coords(); + + if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { + c += " for (int kz = 0; kz < " + kernel_size_z + "; ++kz) {\n"; + c += " int z_c = z_offseted + kz * args.dilation_z;\n"; + if (!src_desc.SupportsZeroClamp(Axis::DEPTH)) { + c += " bool outside_z = z_c < 0 || z_c >= args.src_tensor.Depth();\n"; + } + } + if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) { c += " for (int ky = 0; ky < " + kernel_size_y + "; ++ky) {\n"; c += " int y_c = y_offseted + ky * args.dilation_y;\n"; - c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n"; + if (!src_desc.SupportsZeroClamp(Axis::HEIGHT)) { + c += " bool outside_y = y_c < 0 || y_c >= args.src_tensor.Height();\n"; + } + } + if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) { + c += " for (int kx = 0; kx < " + kernel_size_x + "; ++kx) {\n"; const std::string dilation_x = op_def.IsBatchSupported() ? "args.dilation_x * args.src_tensor.Batch()" : "args.dilation_x"; - c += " int x_c = x_offseted + kx * " + dilation_x + ";\n"; - c += GetSrcValue(channel_multiplier, flat_coords); - if (dynamic_weights) { - c += " FLT4 f = args.weights.Read(kx, ky, S);\n"; + c += " int x_c = x_offseted + kx * " + dilation_x + ";\n"; + if (!src_desc.SupportsZeroClamp(Axis::WIDTH)) { + c += " bool outside_x = x_c < 0 || x_c >= args.src_tensor.Width();\n"; + } + } + if (!check.empty()) { + c += " if (" + check + ") {\n"; + } + if (dynamic_weights) { + c += " FLT4 f = args.weights.Read(kx, ky, S);\n"; + } else { + if (weights_are_buffer) { + c += " FLT4 f = args.weights.Read(fx_c);\n"; } else { - if (weights_are_buffer) { - c += " FLT4 f = args.weights.Read(fx_c);\n"; - } else { - c += " FLT4 f = args.weights.Read(fx_c, S);\n"; - } - c += " fx_c++;\n"; + c += " FLT4 f = args.weights.Read(fx_c, S);\n"; } - c += " r += TO_ACCUM_TYPE(src_final * f);\n"; + } + c += GetSrcValue(channel_multiplier, coords); + c += " r += TO_ACCUM_TYPE(src_final * f);\n"; + if (!check.empty()) { c += " }\n"; + } + if (!dynamic_weights) { + c += " fx_c++;\n"; + } + if (op_def.dst_tensors[0].HasAxis(Axis::WIDTH)) { + c += " }\n"; + } + if (op_def.dst_tensors[0].HasAxis(Axis::HEIGHT)) { + c += " }\n"; + } + if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { c += " }\n"; - if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { - c += " }\n"; - } } c += " FLT4 res0 = TO_FLT4(r) + args.biases.Read(S);\n"; if (op_def.dst_tensors[0].HasAxis(Axis::DEPTH)) { @@ -228,7 +230,6 @@ std::string GenerateDepthwiseConvolutionCode( c += " args.dst_tensor.Write(res0, X, Y, S);\n"; } c += "}\n"; - return c; } } // namespace -- GitLab