Merge pull request #21025 from alalek:issue_21004

* dnn(ocl4dnn): fix LRN layer accuracy problems - FP16 intermediate computation is not accurate and may provide NaN values * dnn(test): update tolerance for FP16

Merge pull request #21025 from alalek:issue_21004
* dnn(ocl4dnn): fix LRN layer accuracy problems - FP16 intermediate computation is not accurate and may provide NaN values * dnn(test): update tolerance for FP16
8041ab8a · Alexander Alekhin · GitHub · cb286a66 · 8041ab8a · 8041ab8a
隐藏空白更改
内联并排

Showing with 14 addition and 13 deletion

modules/dnn/src/opencl/ocl4dnn_lrn.cl modules/dnn/src/opencl/ocl4dnn_lrn.cl +13 -12

modules/dnn/test/test_caffe_importer.cpp modules/dnn/test/test_caffe_importer.cpp +1 -1

未找到文件。
--- a/modules/dnn/src/opencl/ocl4dnn_lrn.cl
+++ b/modules/dnn/src/opencl/ocl4dnn_lrn.cl
@@ -64,36 +64,37 @@ __kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global con
    const int step = height * width;
    __global const Dtype* in_off = in + offset;
    __global Dtype* out_off = out + offset;
-    KERNEL_ARG_DTYPE scale_val;
    int head = 0;
    const int pre_pad = (size - 1) / 2;
    const int post_pad = size - pre_pad - 1;
-    KERNEL_ARG_DTYPE accum_scale = 0;
+    float accum_scale = 0;
    // fill the scale at [n, :, h, w]
    // accumulate values
    while (head < post_pad && head < channels) {
-      accum_scale += in_off[head * step] * in_off[head * step];
+      float v = in_off[head * step];
+      accum_scale += v * v;
      ++head;
    }
    // both add and subtract
    while (head < channels) {
-      accum_scale += in_off[head * step] * in_off[head * step];
+      float v = in_off[head * step];
+      accum_scale += v * v;
      if (head - size >= 0) {
-        accum_scale -= in_off[(head - size) * step]
-            * in_off[(head - size) * step];
+        v = in_off[(head - size) * step];
+        accum_scale -= v * v;
      }
-      scale_val = k + accum_scale * alpha_over_size;
-      out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
+      float scale_val = k + accum_scale * alpha_over_size;
+      out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
      ++head;
    }
    // subtract only
    while (head < channels + post_pad) {
      if (head - size >= 0) {
-        accum_scale -= in_off[(head - size) * step]
-            * in_off[(head - size) * step];
+        float v = in_off[(head - size) * step];
+        accum_scale -= v * v;
      }
-      scale_val = k + accum_scale * alpha_over_size;
-      out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
+      float scale_val = k + accum_scale * alpha_over_size;
+      out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
      ++head;
    }
  }

--- a/modules/dnn/test/test_caffe_importer.cpp
+++ b/modules/dnn/test/test_caffe_importer.cpp
@@ -198,7 +198,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
    ASSERT_EQ(inLayerShapes[0][3], 227);

    const float l1 = 1e-5;
-    const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
+    const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-3 : 1e-4;

    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(targetId);