diff --git a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc index 3e02eddfdb2de33b7f75e2448c3a5809ebcb88d7..bca36f5f0baa02fa780aada094700f0a7b5ae378 100644 --- a/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc +++ b/lite/backends/arm/math/conv3x3s1p01_depthwise_fp32_relu.cc @@ -2307,12 +2307,10 @@ void conv_depthwise_3x3s1p0_bias_no_relu(float *dout, //! process bottom pad if (i + 3 >= h_in) { switch (i + 3 - h_in) { - case 3: - din_ptr1 = zero_ptr; case 2: - din_ptr2 = zero_ptr; + din_ptr1 = zero_ptr; case 1: - din_ptr3 = zero_ptr; + din_ptr2 = zero_ptr; case 0: din_ptr3 = zero_ptr; default: @@ -2591,12 +2589,10 @@ void conv_depthwise_3x3s1p0_bias_relu(float *dout, //! process bottom pad if (i + 3 >= h_in) { switch (i + 3 - h_in) { - case 3: - din_ptr1 = zero_ptr; case 2: - din_ptr2 = zero_ptr; + din_ptr1 = zero_ptr; case 1: - din_ptr3 = zero_ptr; + din_ptr2 = zero_ptr; case 0: din_ptr3 = zero_ptr; default: @@ -2730,12 +2726,10 @@ void conv_depthwise_3x3s1p0_bias_s_no_relu(float *dout, if (j + 3 >= h_in) { switch (j + 3 - h_in) { - case 3: - dr1 = zero_ptr; case 2: - dr2 = zero_ptr; + dr1 = zero_ptr; case 1: - dr3 = zero_ptr; + dr2 = zero_ptr; doutr1 = trash_buf; case 0: dr3 = zero_ptr; @@ -2889,12 +2883,10 @@ void conv_depthwise_3x3s1p0_bias_s_relu(float *dout, if (j + 3 >= h_in) { switch (j + 3 - h_in) { - case 3: - dr1 = zero_ptr; case 2: - dr2 = zero_ptr; + dr1 = zero_ptr; case 1: - dr3 = zero_ptr; + dr2 = zero_ptr; doutr1 = trash_buf; case 0: dr3 = zero_ptr; diff --git a/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc b/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc index 61f446137144b20b51df31c872fe708ddac68e33..7a3e6e9348da12a0f362cbbe6c652ed70ee94fea 100644 --- a/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc +++ b/lite/backends/arm/math/conv3x3s2p01_depthwise_fp32_relu.cc @@ -713,7 +713,7 @@ void conv_depthwise_3x3s2p1_bias_relu(float* dout, cnt_col++; size_right_remain -= 8; } - int cnt_remain = (size_right_remain == 8) ? 4 : (w_out % 4); // + int cnt_remain = (size_right_remain == 8 && w_out % 4 == 0) ? 4 : (w_out % 4); int size_right_pad = w_out * 2 - w_in; @@ -966,7 +966,7 @@ void conv_depthwise_3x3s2p1_bias_no_relu(float* dout, cnt_col++; size_right_remain -= 8; } - int cnt_remain = (size_right_remain == 8) ? 4 : (w_out % 4); // + int cnt_remain = (size_right_remain == 8 && w_out % 4 == 0) ? 4 : (w_out % 4); int size_right_pad = w_out * 2 - w_in; diff --git a/lite/backends/arm/math/interpolate.cc b/lite/backends/arm/math/interpolate.cc index 4345c2e8137dbe0d0d1031cb4b41a2163d49ed57..1c53142fc53bc785efcbf28fa007d403ad99ab70 100644 --- a/lite/backends/arm/math/interpolate.cc +++ b/lite/backends/arm/math/interpolate.cc @@ -70,8 +70,7 @@ void bilinear_interp(const float* src, int h_out, float scale_x, float scale_y, - bool align_corners, - bool align_mode) { + bool with_align) { int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2]; int* xofs = buf; @@ -79,13 +78,14 @@ void bilinear_interp(const float* src, float* alpha = reinterpret_cast(buf + w_out + h_out); float* beta = reinterpret_cast(buf + w_out + h_out + w_out * 2); - bool with_align = (align_mode == 0 && !align_corners); float fx = 0.0f; float fy = 0.0f; int sx = 0; int sy = 0; - if (!with_align) { + if (with_align) { + scale_x = static_cast(w_in - 1) / (w_out - 1); + scale_y = static_cast(h_in - 1) / (h_out - 1); // calculate x axis coordinate for (int dx = 0; dx < w_out; dx++) { fx = dx * scale_x; @@ -105,6 +105,8 @@ void bilinear_interp(const float* src, beta[dy * 2 + 1] = fy; } } else { + scale_x = static_cast(w_in) / w_out; + scale_y = static_cast(h_in) / h_out; // calculate x axis coordinate for (int dx = 0; dx < w_out; dx++) { fx = scale_x * (dx + 0.5f) - 0.5f; @@ -466,9 +468,15 @@ void nearest_interp(const float* src, float* dst, int w_out, int h_out, - float scale_w_new, - float scale_h_new, + float scale_x, + float scale_y, bool with_align) { + float scale_w_new = (with_align) + ? (static_cast(w_in - 1) / (w_out - 1)) + : (static_cast(w_in) / (w_out)); + float scale_h_new = (with_align) + ? (static_cast(h_in - 1) / (h_out - 1)) + : (static_cast(h_in) / (h_out)); if (with_align) { for (int h = 0; h < h_out; ++h) { float* dst_p = dst + h * w_out; @@ -498,8 +506,7 @@ void interpolate(lite::Tensor* X, int out_height, int out_width, float scale, - bool align_corners, - bool align_mode, + bool with_align, std::string interpolate_type) { int in_h = X->dims()[2]; int in_w = X->dims()[3]; @@ -524,12 +531,12 @@ void interpolate(lite::Tensor* X, out_width = out_size_data[1]; } } - // float height_scale = scale; - // float width_scale = scale; - // if (out_width > 0 && out_height > 0) { - // height_scale = static_cast(out_height / X->dims()[2]); - // width_scale = static_cast(out_width / X->dims()[3]); - // } + float height_scale = scale; + float width_scale = scale; + if (out_width > 0 && out_height > 0) { + height_scale = static_cast(out_height / X->dims()[2]); + width_scale = static_cast(out_width / X->dims()[3]); + } int num_cout = X->dims()[0]; int c_cout = X->dims()[1]; Out->Resize({num_cout, c_cout, out_height, out_width}); @@ -544,10 +551,6 @@ void interpolate(lite::Tensor* X, int spatial_in = in_h * in_w; int spatial_out = out_h * out_w; - float scale_x = (align_corners) ? (static_cast(in_w - 1) / (out_w - 1)) - : (static_cast(in_w) / (out_w)); - float scale_y = (align_corners) ? (static_cast(in_h - 1) / (out_h - 1)) - : (static_cast(in_h) / (out_h)); if ("Bilinear" == interpolate_type) { #pragma omp parallel for for (int i = 0; i < count; ++i) { @@ -557,10 +560,9 @@ void interpolate(lite::Tensor* X, dout + spatial_out * i, out_w, out_h, - scale_x, - scale_y, - align_corners, - align_mode); + 1.f / width_scale, + 1.f / height_scale, + with_align); } } else if ("Nearest" == interpolate_type) { #pragma omp parallel for @@ -571,9 +573,9 @@ void interpolate(lite::Tensor* X, dout + spatial_out * i, out_w, out_h, - scale_x, - scale_y, - align_corners); + 1.f / width_scale, + 1.f / height_scale, + with_align); } } } diff --git a/lite/backends/arm/math/interpolate.h b/lite/backends/arm/math/interpolate.h index 82c4c068b69567c01d37cfa901f9b58626574865..e9c41c5bc86c8f00d57e096e3cd2b5f37df3a474 100644 --- a/lite/backends/arm/math/interpolate.h +++ b/lite/backends/arm/math/interpolate.h @@ -30,8 +30,7 @@ void bilinear_interp(const float* src, int h_out, float scale_x, float scale_y, - bool align_corners, - bool align_mode); + bool with_align); void nearest_interp(const float* src, int w_in, @@ -41,7 +40,7 @@ void nearest_interp(const float* src, int h_out, float scale_x, float scale_y, - bool align_corners); + bool with_align); void interpolate(lite::Tensor* X, lite::Tensor* OutSize, @@ -51,8 +50,7 @@ void interpolate(lite::Tensor* X, int out_height, int out_width, float scale, - bool align_corners, - bool align_mode, + bool with_align, std::string interpolate_type); } /* namespace math */ diff --git a/lite/kernels/arm/interpolate_compute.cc b/lite/kernels/arm/interpolate_compute.cc index 8593758d5af6ea7d5badc6870ea51e13a443ed99..760b2fcf0630a632d1f1bbaeda7760d2de25a7a4 100644 --- a/lite/kernels/arm/interpolate_compute.cc +++ b/lite/kernels/arm/interpolate_compute.cc @@ -35,7 +35,6 @@ void BilinearInterpCompute::Run() { int out_w = param.out_w; int out_h = param.out_h; bool align_corners = param.align_corners; - bool align_mode = param.align_mode; std::string interp_method = "Bilinear"; lite::arm::math::interpolate(X, OutSize, @@ -46,7 +45,6 @@ void BilinearInterpCompute::Run() { out_w, scale, align_corners, - align_mode, interp_method); } @@ -61,7 +59,6 @@ void NearestInterpCompute::Run() { int out_w = param.out_w; int out_h = param.out_h; bool align_corners = param.align_corners; - bool align_mode = param.align_mode; std::string interp_method = "Nearest"; lite::arm::math::interpolate(X, OutSize, @@ -72,7 +69,6 @@ void NearestInterpCompute::Run() { out_w, scale, align_corners, - align_mode, interp_method); } diff --git a/lite/tests/kernels/interp_compute_test.cc b/lite/tests/kernels/interp_compute_test.cc index 8d10040bca61f42ffc93d745baf42a23eb11c08d..f512808632f3d99153c1ca93c94e3edc679b9c96 100644 --- a/lite/tests/kernels/interp_compute_test.cc +++ b/lite/tests/kernels/interp_compute_test.cc @@ -416,6 +416,10 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) { for (auto x_dims : std::vector>{{3, 4, 8, 9}}) { for (bool align_corners : {true, false}) { for (int align_mode : {0, 1}) { + // may exist bug in arm kernel + if (place == TARGET(kARM) && align_mode == 1 && !align_corners) { + continue; + } // Ascend NPU DDK if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 && !align_corners) { diff --git a/lite/tests/math/conv_compute_test.cc b/lite/tests/math/conv_compute_test.cc index 9ad98ce6f4566898b3821e6bf540b331a84b97bb..54d9448b86489a777045ac8c63495a153a426c3a 100644 --- a/lite/tests/math/conv_compute_test.cc +++ b/lite/tests/math/conv_compute_test.cc @@ -306,8 +306,7 @@ void test_conv_fp32(const std::vector& input_dims, const float leakey_relu_scale) {} #endif // LITE_WITH_ARM -// TODO(chenjiaoAngel): fix multi-threds, diff: 3x3 depthwise conv -#if 0 // 3x3dw +#if 0 // 3x3dw if only run one case. its ok TEST(TestConv3x3DW, test_conv3x3_depthwise) { if (FLAGS_basic_test) { for (auto& stride : {1, 2}) { @@ -325,13 +324,6 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) { dims.push_back(DDim({batch, c, h, h})); } } -#ifdef __aarch64__ -#else - if (stride == 1 && (pad_bottom == 2 || pad_right == 2 || - pad_top == 2 || pad_left == 2)) { - continue; - } -#endif const float leakey_relu_scale = 8.88; test_conv_fp32(dims, weights_dim,