未验证 提交 ea4fc0bc 编写于 作者: H HappyAngel 提交者: GitHub

[arm] fix conv_3x3_dw compute error in no-equal-padding. test=develop (#4375)

* fix compute error in no-equal-padding. test=develop

* fix conv_ut. test=develop

* test=develop

* fix format. test=develop
上级 80452148
...@@ -2307,12 +2307,10 @@ void conv_depthwise_3x3s1p0_bias_no_relu(float *dout, ...@@ -2307,12 +2307,10 @@ void conv_depthwise_3x3s1p0_bias_no_relu(float *dout,
//! process bottom pad //! process bottom pad
if (i + 3 >= h_in) { if (i + 3 >= h_in) {
switch (i + 3 - h_in) { switch (i + 3 - h_in) {
case 3:
din_ptr1 = zero_ptr;
case 2: case 2:
din_ptr2 = zero_ptr; din_ptr1 = zero_ptr;
case 1: case 1:
din_ptr3 = zero_ptr; din_ptr2 = zero_ptr;
case 0: case 0:
din_ptr3 = zero_ptr; din_ptr3 = zero_ptr;
default: default:
...@@ -2591,12 +2589,10 @@ void conv_depthwise_3x3s1p0_bias_relu(float *dout, ...@@ -2591,12 +2589,10 @@ void conv_depthwise_3x3s1p0_bias_relu(float *dout,
//! process bottom pad //! process bottom pad
if (i + 3 >= h_in) { if (i + 3 >= h_in) {
switch (i + 3 - h_in) { switch (i + 3 - h_in) {
case 3:
din_ptr1 = zero_ptr;
case 2: case 2:
din_ptr2 = zero_ptr; din_ptr1 = zero_ptr;
case 1: case 1:
din_ptr3 = zero_ptr; din_ptr2 = zero_ptr;
case 0: case 0:
din_ptr3 = zero_ptr; din_ptr3 = zero_ptr;
default: default:
...@@ -2730,12 +2726,10 @@ void conv_depthwise_3x3s1p0_bias_s_no_relu(float *dout, ...@@ -2730,12 +2726,10 @@ void conv_depthwise_3x3s1p0_bias_s_no_relu(float *dout,
if (j + 3 >= h_in) { if (j + 3 >= h_in) {
switch (j + 3 - h_in) { switch (j + 3 - h_in) {
case 3:
dr1 = zero_ptr;
case 2: case 2:
dr2 = zero_ptr; dr1 = zero_ptr;
case 1: case 1:
dr3 = zero_ptr; dr2 = zero_ptr;
doutr1 = trash_buf; doutr1 = trash_buf;
case 0: case 0:
dr3 = zero_ptr; dr3 = zero_ptr;
...@@ -2889,12 +2883,10 @@ void conv_depthwise_3x3s1p0_bias_s_relu(float *dout, ...@@ -2889,12 +2883,10 @@ void conv_depthwise_3x3s1p0_bias_s_relu(float *dout,
if (j + 3 >= h_in) { if (j + 3 >= h_in) {
switch (j + 3 - h_in) { switch (j + 3 - h_in) {
case 3:
dr1 = zero_ptr;
case 2: case 2:
dr2 = zero_ptr; dr1 = zero_ptr;
case 1: case 1:
dr3 = zero_ptr; dr2 = zero_ptr;
doutr1 = trash_buf; doutr1 = trash_buf;
case 0: case 0:
dr3 = zero_ptr; dr3 = zero_ptr;
......
...@@ -713,7 +713,7 @@ void conv_depthwise_3x3s2p1_bias_relu(float* dout, ...@@ -713,7 +713,7 @@ void conv_depthwise_3x3s2p1_bias_relu(float* dout,
cnt_col++; cnt_col++;
size_right_remain -= 8; size_right_remain -= 8;
} }
int cnt_remain = (size_right_remain == 8) ? 4 : (w_out % 4); // int cnt_remain = (size_right_remain == 8 && w_out % 4 == 0) ? 4 : (w_out % 4);
int size_right_pad = w_out * 2 - w_in; int size_right_pad = w_out * 2 - w_in;
...@@ -966,7 +966,7 @@ void conv_depthwise_3x3s2p1_bias_no_relu(float* dout, ...@@ -966,7 +966,7 @@ void conv_depthwise_3x3s2p1_bias_no_relu(float* dout,
cnt_col++; cnt_col++;
size_right_remain -= 8; size_right_remain -= 8;
} }
int cnt_remain = (size_right_remain == 8) ? 4 : (w_out % 4); // int cnt_remain = (size_right_remain == 8 && w_out % 4 == 0) ? 4 : (w_out % 4);
int size_right_pad = w_out * 2 - w_in; int size_right_pad = w_out * 2 - w_in;
......
...@@ -70,8 +70,7 @@ void bilinear_interp(const float* src, ...@@ -70,8 +70,7 @@ void bilinear_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool align_corners, bool with_align) {
bool align_mode) {
int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2]; int* buf = new int[w_out + h_out + w_out * 2 + h_out * 2];
int* xofs = buf; int* xofs = buf;
...@@ -79,13 +78,14 @@ void bilinear_interp(const float* src, ...@@ -79,13 +78,14 @@ void bilinear_interp(const float* src,
float* alpha = reinterpret_cast<float*>(buf + w_out + h_out); float* alpha = reinterpret_cast<float*>(buf + w_out + h_out);
float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2); float* beta = reinterpret_cast<float*>(buf + w_out + h_out + w_out * 2);
bool with_align = (align_mode == 0 && !align_corners);
float fx = 0.0f; float fx = 0.0f;
float fy = 0.0f; float fy = 0.0f;
int sx = 0; int sx = 0;
int sy = 0; int sy = 0;
if (!with_align) { if (with_align) {
scale_x = static_cast<float>(w_in - 1) / (w_out - 1);
scale_y = static_cast<float>(h_in - 1) / (h_out - 1);
// calculate x axis coordinate // calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) { for (int dx = 0; dx < w_out; dx++) {
fx = dx * scale_x; fx = dx * scale_x;
...@@ -105,6 +105,8 @@ void bilinear_interp(const float* src, ...@@ -105,6 +105,8 @@ void bilinear_interp(const float* src,
beta[dy * 2 + 1] = fy; beta[dy * 2 + 1] = fy;
} }
} else { } else {
scale_x = static_cast<float>(w_in) / w_out;
scale_y = static_cast<float>(h_in) / h_out;
// calculate x axis coordinate // calculate x axis coordinate
for (int dx = 0; dx < w_out; dx++) { for (int dx = 0; dx < w_out; dx++) {
fx = scale_x * (dx + 0.5f) - 0.5f; fx = scale_x * (dx + 0.5f) - 0.5f;
...@@ -466,9 +468,15 @@ void nearest_interp(const float* src, ...@@ -466,9 +468,15 @@ void nearest_interp(const float* src,
float* dst, float* dst,
int w_out, int w_out,
int h_out, int h_out,
float scale_w_new, float scale_x,
float scale_h_new, float scale_y,
bool with_align) { bool with_align) {
float scale_w_new = (with_align)
? (static_cast<float>(w_in - 1) / (w_out - 1))
: (static_cast<float>(w_in) / (w_out));
float scale_h_new = (with_align)
? (static_cast<float>(h_in - 1) / (h_out - 1))
: (static_cast<float>(h_in) / (h_out));
if (with_align) { if (with_align) {
for (int h = 0; h < h_out; ++h) { for (int h = 0; h < h_out; ++h) {
float* dst_p = dst + h * w_out; float* dst_p = dst + h * w_out;
...@@ -498,8 +506,7 @@ void interpolate(lite::Tensor* X, ...@@ -498,8 +506,7 @@ void interpolate(lite::Tensor* X,
int out_height, int out_height,
int out_width, int out_width,
float scale, float scale,
bool align_corners, bool with_align,
bool align_mode,
std::string interpolate_type) { std::string interpolate_type) {
int in_h = X->dims()[2]; int in_h = X->dims()[2];
int in_w = X->dims()[3]; int in_w = X->dims()[3];
...@@ -524,12 +531,12 @@ void interpolate(lite::Tensor* X, ...@@ -524,12 +531,12 @@ void interpolate(lite::Tensor* X,
out_width = out_size_data[1]; out_width = out_size_data[1];
} }
} }
// float height_scale = scale; float height_scale = scale;
// float width_scale = scale; float width_scale = scale;
// if (out_width > 0 && out_height > 0) { if (out_width > 0 && out_height > 0) {
// height_scale = static_cast<float>(out_height / X->dims()[2]); height_scale = static_cast<float>(out_height / X->dims()[2]);
// width_scale = static_cast<float>(out_width / X->dims()[3]); width_scale = static_cast<float>(out_width / X->dims()[3]);
// } }
int num_cout = X->dims()[0]; int num_cout = X->dims()[0];
int c_cout = X->dims()[1]; int c_cout = X->dims()[1];
Out->Resize({num_cout, c_cout, out_height, out_width}); Out->Resize({num_cout, c_cout, out_height, out_width});
...@@ -544,10 +551,6 @@ void interpolate(lite::Tensor* X, ...@@ -544,10 +551,6 @@ void interpolate(lite::Tensor* X,
int spatial_in = in_h * in_w; int spatial_in = in_h * in_w;
int spatial_out = out_h * out_w; int spatial_out = out_h * out_w;
float scale_x = (align_corners) ? (static_cast<float>(in_w - 1) / (out_w - 1))
: (static_cast<float>(in_w) / (out_w));
float scale_y = (align_corners) ? (static_cast<float>(in_h - 1) / (out_h - 1))
: (static_cast<float>(in_h) / (out_h));
if ("Bilinear" == interpolate_type) { if ("Bilinear" == interpolate_type) {
#pragma omp parallel for #pragma omp parallel for
for (int i = 0; i < count; ++i) { for (int i = 0; i < count; ++i) {
...@@ -557,10 +560,9 @@ void interpolate(lite::Tensor* X, ...@@ -557,10 +560,9 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i, dout + spatial_out * i,
out_w, out_w,
out_h, out_h,
scale_x, 1.f / width_scale,
scale_y, 1.f / height_scale,
align_corners, with_align);
align_mode);
} }
} else if ("Nearest" == interpolate_type) { } else if ("Nearest" == interpolate_type) {
#pragma omp parallel for #pragma omp parallel for
...@@ -571,9 +573,9 @@ void interpolate(lite::Tensor* X, ...@@ -571,9 +573,9 @@ void interpolate(lite::Tensor* X,
dout + spatial_out * i, dout + spatial_out * i,
out_w, out_w,
out_h, out_h,
scale_x, 1.f / width_scale,
scale_y, 1.f / height_scale,
align_corners); with_align);
} }
} }
} }
......
...@@ -30,8 +30,7 @@ void bilinear_interp(const float* src, ...@@ -30,8 +30,7 @@ void bilinear_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool align_corners, bool with_align);
bool align_mode);
void nearest_interp(const float* src, void nearest_interp(const float* src,
int w_in, int w_in,
...@@ -41,7 +40,7 @@ void nearest_interp(const float* src, ...@@ -41,7 +40,7 @@ void nearest_interp(const float* src,
int h_out, int h_out,
float scale_x, float scale_x,
float scale_y, float scale_y,
bool align_corners); bool with_align);
void interpolate(lite::Tensor* X, void interpolate(lite::Tensor* X,
lite::Tensor* OutSize, lite::Tensor* OutSize,
...@@ -51,8 +50,7 @@ void interpolate(lite::Tensor* X, ...@@ -51,8 +50,7 @@ void interpolate(lite::Tensor* X,
int out_height, int out_height,
int out_width, int out_width,
float scale, float scale,
bool align_corners, bool with_align,
bool align_mode,
std::string interpolate_type); std::string interpolate_type);
} /* namespace math */ } /* namespace math */
......
...@@ -35,7 +35,6 @@ void BilinearInterpCompute::Run() { ...@@ -35,7 +35,6 @@ void BilinearInterpCompute::Run() {
int out_w = param.out_w; int out_w = param.out_w;
int out_h = param.out_h; int out_h = param.out_h;
bool align_corners = param.align_corners; bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Bilinear"; std::string interp_method = "Bilinear";
lite::arm::math::interpolate(X, lite::arm::math::interpolate(X,
OutSize, OutSize,
...@@ -46,7 +45,6 @@ void BilinearInterpCompute::Run() { ...@@ -46,7 +45,6 @@ void BilinearInterpCompute::Run() {
out_w, out_w,
scale, scale,
align_corners, align_corners,
align_mode,
interp_method); interp_method);
} }
...@@ -61,7 +59,6 @@ void NearestInterpCompute::Run() { ...@@ -61,7 +59,6 @@ void NearestInterpCompute::Run() {
int out_w = param.out_w; int out_w = param.out_w;
int out_h = param.out_h; int out_h = param.out_h;
bool align_corners = param.align_corners; bool align_corners = param.align_corners;
bool align_mode = param.align_mode;
std::string interp_method = "Nearest"; std::string interp_method = "Nearest";
lite::arm::math::interpolate(X, lite::arm::math::interpolate(X,
OutSize, OutSize,
...@@ -72,7 +69,6 @@ void NearestInterpCompute::Run() { ...@@ -72,7 +69,6 @@ void NearestInterpCompute::Run() {
out_w, out_w,
scale, scale,
align_corners, align_corners,
align_mode,
interp_method); interp_method);
} }
......
...@@ -416,6 +416,10 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) { ...@@ -416,6 +416,10 @@ void TestInterpAlignMode(Place place, float abs_error = 2e-5) {
for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) { for (auto x_dims : std::vector<std::vector<int64_t>>{{3, 4, 8, 9}}) {
for (bool align_corners : {true, false}) { for (bool align_corners : {true, false}) {
for (int align_mode : {0, 1}) { for (int align_mode : {0, 1}) {
// may exist bug in arm kernel
if (place == TARGET(kARM) && align_mode == 1 && !align_corners) {
continue;
}
// Ascend NPU DDK // Ascend NPU DDK
if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 && if (place == TARGET(kHuaweiAscendNPU) && align_mode == 0 &&
!align_corners) { !align_corners) {
......
...@@ -306,8 +306,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims, ...@@ -306,8 +306,7 @@ void test_conv_fp32(const std::vector<DDim>& input_dims,
const float leakey_relu_scale) {} const float leakey_relu_scale) {}
#endif // LITE_WITH_ARM #endif // LITE_WITH_ARM
// TODO(chenjiaoAngel): fix multi-threds, diff: 3x3 depthwise conv #if 0 // 3x3dw if only run one case. its ok
#if 0 // 3x3dw
TEST(TestConv3x3DW, test_conv3x3_depthwise) { TEST(TestConv3x3DW, test_conv3x3_depthwise) {
if (FLAGS_basic_test) { if (FLAGS_basic_test) {
for (auto& stride : {1, 2}) { for (auto& stride : {1, 2}) {
...@@ -325,13 +324,6 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) { ...@@ -325,13 +324,6 @@ TEST(TestConv3x3DW, test_conv3x3_depthwise) {
dims.push_back(DDim({batch, c, h, h})); dims.push_back(DDim({batch, c, h, h}));
} }
} }
#ifdef __aarch64__
#else
if (stride == 1 && (pad_bottom == 2 || pad_right == 2 ||
pad_top == 2 || pad_left == 2)) {
continue;
}
#endif
const float leakey_relu_scale = 8.88; const float leakey_relu_scale = 8.88;
test_conv_fp32(dims, test_conv_fp32(dims,
weights_dim, weights_dim,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册