未验证 提交 8041ab8a 编写于 作者: A Alexander Alekhin 提交者: GitHub

Merge pull request #21025 from alalek:issue_21004

* dnn(ocl4dnn): fix LRN layer accuracy problems

- FP16 intermediate computation is not accurate and may provide NaN values

* dnn(test): update tolerance for FP16
上级 cb286a66
......@@ -64,36 +64,37 @@ __kernel void TEMPLATE(lrn_full_no_scale,Dtype)(const int nthreads, __global con
const int step = height * width;
__global const Dtype* in_off = in + offset;
__global Dtype* out_off = out + offset;
KERNEL_ARG_DTYPE scale_val;
int head = 0;
const int pre_pad = (size - 1) / 2;
const int post_pad = size - pre_pad - 1;
KERNEL_ARG_DTYPE accum_scale = 0;
float accum_scale = 0;
// fill the scale at [n, :, h, w]
// accumulate values
while (head < post_pad && head < channels) {
accum_scale += in_off[head * step] * in_off[head * step];
float v = in_off[head * step];
accum_scale += v * v;
++head;
}
// both add and subtract
while (head < channels) {
accum_scale += in_off[head * step] * in_off[head * step];
float v = in_off[head * step];
accum_scale += v * v;
if (head - size >= 0) {
accum_scale -= in_off[(head - size) * step]
* in_off[(head - size) * step];
v = in_off[(head - size) * step];
accum_scale -= v * v;
}
scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
float scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
++head;
}
// subtract only
while (head < channels + post_pad) {
if (head - size >= 0) {
accum_scale -= in_off[(head - size) * step]
* in_off[(head - size) * step];
float v = in_off[(head - size) * step];
accum_scale -= v * v;
}
scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = in_off[(head - post_pad) * step] * (Dtype)native_powr(scale_val, negative_beta);
float scale_val = k + accum_scale * alpha_over_size;
out_off[(head - post_pad) * step] = (Dtype)((float)in_off[(head - post_pad) * step] * native_powr(scale_val, negative_beta));
++head;
}
}
......
......@@ -198,7 +198,7 @@ TEST_P(Reproducibility_AlexNet, Accuracy)
ASSERT_EQ(inLayerShapes[0][3], 227);
const float l1 = 1e-5;
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 3e-3 : 1e-4;
const float lInf = (targetId == DNN_TARGET_OPENCL_FP16) ? 4e-3 : 1e-4;
net.setPreferableBackend(DNN_BACKEND_OPENCV);
net.setPreferableTarget(targetId);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册