From dc6f89f2d37254175fa09db4a5bd1b409ddba638 Mon Sep 17 00:00:00 2001 From: Megvii Engine Team Date: Tue, 31 Mar 2020 16:23:51 +0800 Subject: [PATCH] refactor(dnn): refactor winograd strategy helper GitOrigin-RevId: ecc2b15df995a526688d3a5593d8db6767c0c717 --- dnn/src/common/winograd/winograd_helper.cpp | 801 ++++++------------ dnn/src/common/winograd/winograd_helper.h | 48 +- .../fallback/conv_bias/winograd/strategy.cpp | 79 +- .../fallback/conv_bias/winograd/strategy.h | 1 + .../winograd_filter_preprocess/opr_impl.cpp | 53 +- 5 files changed, 339 insertions(+), 643 deletions(-) diff --git a/dnn/src/common/winograd/winograd_helper.cpp b/dnn/src/common/winograd/winograd_helper.cpp index 6f1dcdd56..767bdda4c 100644 --- a/dnn/src/common/winograd/winograd_helper.cpp +++ b/dnn/src/common/winograd/winograd_helper.cpp @@ -58,368 +58,300 @@ struct OutputGetter< return dtype.param().quantize(item).as_uint8(); } }; - } // namespace namespace megdnn { namespace winograd { -template -class StrategyHelper { -public: - static void filter(const ctype* filter, - input_filter_compute_type* filter_transform_buf, - input_filter_compute_type* transform_mid_buf, size_t OC, - size_t IC, size_t oc_start, size_t oc_end, size_t m, - size_t r, const std::vector& interp_points, - DType dtype, float rescale) { - size_t alpha = m + r - 1; - WinogradCoeff winograd_coeff(m, r, - interp_points); - - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - Getter getter(dtype); - for (size_t oc = oc_start; oc < oc_end; oc++) { - rep(ic, IC) { - const ctype* filter_ptr = filter + (oc * IC + ic) * r * r; - rep(i, r) rep(j, r) { - mid_buf1[i * r + j] = getter(filter_ptr[i * r + j]); - } +constexpr size_t layout_pack_size(param::ConvBias::Format layout) { + switch (layout) { + case param::ConvBias::Format::NHWCD4: + return 4; + case param::ConvBias::Format::NCHW4: + return 4; + case param::ConvBias::Format::NCHW32: + return 32; + case param::ConvBias::Format::NCHW88: + case param::ConvBias::Format::NCHW8: + return 8; + default: + return 1; + } +} + +template +struct FilterVisitor { + size_t IC, OC; + FilterVisitor(size_t OC, size_t IC) : IC(IC), OC(OC) {} + size_t get(size_t r, size_t oc, size_t ic, size_t h, size_t w) { + constexpr size_t input_pack_size = layout_pack_size(layout); + size_t ocb_layout = oc / input_pack_size; + size_t oc_layout = oc % input_pack_size; + size_t icb_layout = ic / input_pack_size; + size_t ic_layout = ic % input_pack_size; + + return (ocb_layout * (IC / input_pack_size) + icb_layout) * r * r * + input_pack_size * input_pack_size + + ic_layout * input_pack_size + oc_layout + + (h * r + w) * input_pack_size * input_pack_size; + } - /* tmp = Matmul(G, src) */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2, - alpha, r, r, r, r, r, dtype, dtype); - /* dst = Matmul(tmp, G^T) */ - megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1, - alpha, alpha, r, r, r, alpha, dtype, dtype); - - rep(i, alpha) rep(j, alpha) { - filter_transform_buf[(i * alpha + j) * OC * IC + ic * OC + - oc] = mid_buf1[i * alpha + j]; - } - } + size_t put(size_t alpha, size_t oc, size_t ic, size_t h, size_t w) { + if (format == param::MatrixMul::Format::DEFAULT) { + return (h * alpha + w) * OC * IC + ic * OC + oc; } + size_t matmul_pack_size = MatrixMulForward::pack_size(format); + size_t ocb = oc / matmul_pack_size; + size_t oc_pack = oc % matmul_pack_size; + size_t icb = ic / matmul_pack_size; + size_t ic_pack = ic % matmul_pack_size; + + size_t OCB = OC / matmul_pack_size; + size_t ICB = IC / matmul_pack_size; + + return (h * alpha + w) * OCB * ICB * matmul_pack_size * + matmul_pack_size + + ocb * ICB * matmul_pack_size * matmul_pack_size + + icb * matmul_pack_size * matmul_pack_size + + ic_pack * matmul_pack_size + oc_pack; } +}; - static void input(const ctype* input, - input_filter_compute_type* input_transform_buf, - input_filter_compute_type* transform_mid_buf, - int ih_start, int iw_start, size_t IH, size_t IW, - size_t IC, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float rescale) { - size_t alpha = m + r - 1; - Getter getter(dtype); - WinogradCoeff winograd_coeff(m, r, - interp_points); - rep(ic, IC) { - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = - transform_mid_buf + alpha * alpha; +template +struct InputVisitor { + size_t IC; + InputVisitor(size_t IC) : IC(IC) {} - memset(mid_buf1, 0, - alpha * alpha * sizeof(input_filter_compute_type)); - rep(i, alpha) rep(j, alpha) { - int ih = ih_start + i; - int iw = iw_start + j; - if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) { - mid_buf1[i * alpha + j] = - getter(input[ic * IH * IW + ih * IW + iw]); - } - } - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); - megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); - rep(i, alpha) rep(j, alpha) { - input_transform_buf[(i * alpha + j) * nr_units_in_tile * IC + - unit_idx * IC + ic] = - mid_buf1[i * alpha + j]; - } - } + size_t get(size_t alpha, size_t ic, size_t IH, size_t IW, size_t ih, + size_t iw) { + constexpr size_t input_pack_size = layout_pack_size(layout); + size_t icb_layout = ic / input_pack_size; + size_t ic_layout = ic % input_pack_size; + + return (icb_layout * IH * IW + ih * IW + iw) * input_pack_size + + ic_layout; } - static void output(const output_compute_type* output_transform_buf, - const output_compute_type* bias, dst_type* output, - output_compute_type* transform_mid_buf, BiasMode bmode, - NonlineMode nonline_mode, size_t oh_start, - size_t ow_start, size_t OH, size_t OW, size_t oc_start, - size_t oc_end, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float input_filter_scale, float input_filter_rescale, - float rescale) { - size_t alpha = m + r - 1; - size_t OC = oc_end - oc_start; - - OutputGetter getter(dtype); - winograd::WinogradCoeff winograd_coeff( - m, r, interp_points); - for (size_t oc = oc_start; oc < oc_end; oc++) { - size_t oc_index = oc - oc_start; - output_compute_type* mid_buf1 = transform_mid_buf; - output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - // gather - rep(i, alpha) rep(j, alpha) { - mid_buf1[i * alpha + j] = - output_transform_buf[(i * alpha + j) * - nr_units_in_tile * OC + - unit_idx * OC + oc_index]; - } - /* A[alpha*m] M[alpha*alpha] */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m, - alpha, alpha, m, alpha, alpha, dtype, dtype); - megdnn::naive::run_matrix_mul_tpl< - output_compute_type, output_compute_type, false, false>( - mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m, - alpha, alpha, m, m, dtype, dtype); - rep(i, m) rep(j, m) { - auto oh = oh_start + i; - auto ow = ow_start + j; - if (oh < OH && ow < OW) { - float val = mid_buf1[i * m + j]; - if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) { - val += bias[oc] * input_filter_rescale * - input_filter_rescale; - } else if (bmode == BiasMode::BIAS) { - val += bias[oc * OH * OW + oh * OW + ow] * - input_filter_rescale * input_filter_rescale; - } - val = val * input_filter_scale / - (input_filter_rescale * input_filter_rescale * - rescale * rescale); - if (nonline_mode == NonlineMode::RELU) { - val = val > 0 ? val : 0; - } else if (nonline_mode == NonlineMode::SIGMOID) { - val = 1.f / (expf(-val) + 1.f); - } else if (nonline_mode == NonlineMode::H_SWISH) { - val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f; - } else { - megdnn_assert(nonline_mode == NonlineMode::IDENTITY); - } - - output[oc * OH * OW + oh * OW + ow] = getter(val); - } - } + size_t put(size_t alpha, size_t ic, size_t nr_units_in_tile, + size_t unit_idx, size_t h, size_t w) { + if (format == param::MatrixMul::Format::DEFAULT) { + return (h * alpha + w) * nr_units_in_tile * IC + unit_idx * IC + ic; } + size_t matmul_pack_size = MatrixMulForward::pack_size(format); + size_t icb = ic / matmul_pack_size; + size_t ic_pack = ic % matmul_pack_size; + size_t ICB = IC / matmul_pack_size; + + return (h * alpha + w) * ICB * nr_units_in_tile * matmul_pack_size + + icb * nr_units_in_tile * matmul_pack_size + + unit_idx * matmul_pack_size + ic_pack; } }; -template -class StrategyHelper< - ctype, dst_type, input_filter_compute_type, output_compute_type, format, - std::enable_if_t> { -public: - static void filter(const ctype* filter, - input_filter_compute_type* filter_transform_buf, - input_filter_compute_type* transform_mid_buf, size_t OC, - size_t IC, size_t oc_start, size_t oc_end, size_t m, - size_t r, const std::vector& interp_points, - DType dtype, float rescale) { - size_t alpha = m + r - 1; - WinogradCoeff winograd_coeff(m, r, - interp_points); - - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - Getter getter(dtype); - size_t OCB = OC / pack_size; - size_t ICB = IC / pack_size; - for (size_t oc = oc_start; oc < oc_end; oc++) { - rep(ic, IC) { - const ctype* filter_ptr = filter + (oc * IC + ic) * r * r; - rep(i, r) rep(j, r) { - mid_buf1[i * r + j] = getter(filter_ptr[i * r + j]); - } +template +struct OutputVisitor { + size_t OC; + OutputVisitor(size_t OC) : OC(OC) {} - /* tmp = Matmul(G, src) */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2, - alpha, r, r, r, r, r, dtype, dtype); - /* dst = Matmul(tmp, G^T) */ - megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1, - alpha, alpha, r, r, r, alpha, dtype, dtype); - - size_t ocb = oc / pack_size; - size_t oc_pack = oc % pack_size; - size_t icb = ic / pack_size; - size_t ic_pack = ic % pack_size; - rep(i, alpha) rep(j, alpha) { - filter_transform_buf[(i * alpha + j) * OCB * ICB * - pack_size * pack_size + - ocb * ICB * pack_size * pack_size + - icb * pack_size * pack_size + - ic_pack * pack_size + oc_pack] = - mid_buf1[i * alpha + j]; - } - } + size_t get(size_t alpha, size_t oc_index, size_t oc, + size_t nr_units_in_tile, size_t unit_idx, size_t h, size_t w) { + if (format == param::MatrixMul::Format::DEFAULT) { + return (h * alpha + w) * nr_units_in_tile * OC + unit_idx * OC + + oc_index; } + size_t matmul_pack_size = MatrixMulForward::pack_size(format); + size_t ocb = oc_index / matmul_pack_size; + size_t oc_pack = oc % matmul_pack_size; + size_t OCB = OC / matmul_pack_size; + + return (h * alpha + w) * OCB * nr_units_in_tile * matmul_pack_size + + ocb * nr_units_in_tile * matmul_pack_size + + unit_idx * matmul_pack_size + oc_pack; } - static void input(const ctype* input, - input_filter_compute_type* input_transform_buf, - input_filter_compute_type* transform_mid_buf, - int ih_start, int iw_start, size_t IH, size_t IW, - size_t IC, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float rescale) { - size_t alpha = m + r - 1; - Getter getter(dtype); - WinogradCoeff winograd_coeff(m, r, - interp_points); - size_t ICB = IC / pack_size; - rep(ic, IC) { - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = - transform_mid_buf + alpha * alpha; + size_t put(size_t oc, size_t OH, size_t OW, size_t oh, size_t ow) { + constexpr size_t input_pack_size = layout_pack_size(layout); + size_t oc_layout = oc % input_pack_size; - memset(mid_buf1, 0, - alpha * alpha * sizeof(input_filter_compute_type)); - rep(i, alpha) rep(j, alpha) { - int ih = ih_start + i; - int iw = iw_start + j; - if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) { - mid_buf1[i * alpha + j] = - getter(input[ic * IH * IW + ih * IW + iw]); - } + return (oc / input_pack_size * OH * OW + oh * OW + ow) * + input_pack_size + + oc_layout; + } +}; + +template +void StrategyHelper< + ctype, dst_type, input_filter_compute_type, output_compute_type, layout, + format>::filter(const ctype* filter, + input_filter_compute_type* filter_transform_buf, + input_filter_compute_type* transform_mid_buf, size_t OC, + size_t IC, size_t oc_start, size_t oc_end, size_t m, + size_t r, const std::vector& interp_points, + DType dtype, float rescale) { + size_t alpha = m + r - 1; + WinogradCoeff winograd_coeff(m, r, + interp_points); + input_filter_compute_type* mid_buf1 = transform_mid_buf; + input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; + Getter getter(dtype); + FilterVisitor filter_visitor(OC, IC); + + for (size_t oc = oc_start; oc < oc_end; oc++) { + rep(ic, IC) { + rep(i, r) rep(j, r) { + mid_buf1[i * r + j] = + getter(filter[filter_visitor.get(r, oc, ic, i, j)]); } + + /* tmp = Matmul(G, src) */ megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); + winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2, alpha, + r, r, r, r, r, dtype, dtype); + /* dst = Matmul(tmp, G^T) */ megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); - size_t icb = ic / pack_size; - size_t ic_pack = ic % pack_size; + true>( + mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1, alpha, + alpha, r, r, r, alpha, dtype, dtype); + rep(i, alpha) rep(j, alpha) { - input_transform_buf[(i * alpha + j) * ICB * nr_units_in_tile * - pack_size + - icb * nr_units_in_tile * pack_size + - unit_idx * pack_size + ic_pack] = + filter_transform_buf[filter_visitor.put(alpha, oc, ic, i, j)] = mid_buf1[i * alpha + j]; } } } +} - static void output(const output_compute_type* output_transform_buf, - const output_compute_type* bias, dst_type* output, - output_compute_type* transform_mid_buf, BiasMode bmode, - NonlineMode nonline_mode, size_t oh_start, - size_t ow_start, size_t OH, size_t OW, size_t oc_start, - size_t oc_end, size_t unit_idx, size_t nr_units_in_tile, +template +void StrategyHelper< + ctype, dst_type, input_filter_compute_type, output_compute_type, layout, + format>::input(const ctype* input, + input_filter_compute_type* input_transform_buf, + input_filter_compute_type* transform_mid_buf, + int ih_start, int iw_start, size_t IH, size_t IW, + size_t IC, size_t unit_idx, size_t nr_units_in_tile, size_t m, size_t r, const std::vector& interp_points, DType dtype, - float input_filter_scale, float input_filter_rescale, float rescale) { - size_t alpha = m + r - 1; - size_t OC = oc_end - oc_start; - - OutputGetter getter(dtype); - winograd::WinogradCoeff winograd_coeff( - m, r, interp_points); - size_t OCB = OC / pack_size; - for (size_t oc = oc_start; oc < oc_end; oc++) { - output_compute_type* mid_buf1 = transform_mid_buf; - output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - size_t ocb = (oc - oc_start) / pack_size; - size_t oc_pack = oc % pack_size; - // gather - rep(i, alpha) rep(j, alpha) { - mid_buf1[i * alpha + j] = output_transform_buf - [(i * alpha + j) * OCB * nr_units_in_tile * pack_size + - ocb * nr_units_in_tile * pack_size + - unit_idx * pack_size + oc_pack]; + size_t alpha = m + r - 1; + WinogradCoeff winograd_coeff(m, r, + interp_points); + input_filter_compute_type* mid_buf1 = transform_mid_buf; + input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; + Getter getter(dtype); + InputVisitor intput_visitor(IC); + + rep(ic, IC) { + memset(mid_buf1, 0, alpha * alpha * sizeof(input_filter_compute_type)); + rep(i, alpha) rep(j, alpha) { + int ih = ih_start + i; + int iw = iw_start + j; + if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) { + mid_buf1[i * alpha + j] = getter( + input[intput_visitor.get(alpha, ic, IH, IW, ih, iw)]); } - /* A[alpha*m] M[alpha*alpha] */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m, - alpha, alpha, m, alpha, alpha, dtype, dtype); - megdnn::naive::run_matrix_mul_tpl< - output_compute_type, output_compute_type, false, false>( - mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m, - alpha, alpha, m, m, dtype, dtype); - rep(i, m) rep(j, m) { - auto oh = oh_start + i; - auto ow = ow_start + j; - if (oh < OH && ow < OW) { - float val = mid_buf1[i * m + j]; - if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) { - val += bias[oc] * input_filter_rescale * - input_filter_rescale; - } else if (bmode == BiasMode::BIAS) { - val += bias[oc * OH * OW + oh * OW + ow] * - input_filter_rescale * input_filter_rescale; - } - val = val * input_filter_scale / - (input_filter_rescale * input_filter_rescale * - rescale * rescale); - if (nonline_mode == NonlineMode::RELU) { - val = val > 0 ? val : 0; - } else if (nonline_mode == NonlineMode::SIGMOID) { - val = 1.f / (expf(-val) + 1.f); - } else if (nonline_mode == NonlineMode::H_SWISH) { - val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f; - } else { - megdnn_assert(nonline_mode == NonlineMode::IDENTITY); - } - - output[oc * OH * OW + oh * OW + ow] = getter(val); + } + + megdnn::naive::run_matrix_mul_tpl( + winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha, + alpha, alpha, alpha, alpha, alpha, dtype, dtype); + megdnn::naive::run_matrix_mul_tpl( + mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha, + alpha, alpha, alpha, alpha, alpha, dtype, dtype); + + rep(i, alpha) rep(j, alpha) { + input_transform_buf[intput_visitor.put(alpha, ic, nr_units_in_tile, + unit_idx, i, j)] = + mid_buf1[i * alpha + j]; + } + } +} + +template +void StrategyHelper< + ctype, dst_type, input_filter_compute_type, output_compute_type, layout, + format>::output(const output_compute_type* output_transform_buf, + const output_compute_type* bias, dst_type* output, + output_compute_type* transform_mid_buf, BiasMode bmode, + NonlineMode nonline_mode, size_t oh_start, + size_t ow_start, size_t OH, size_t OW, size_t oc_start, + size_t oc_end, size_t unit_idx, size_t nr_units_in_tile, + size_t m, size_t r, + const std::vector& interp_points, DType dtype, + float input_filter_scale, float input_filter_rescale, + float rescale) { + size_t alpha = m + r - 1; + winograd::WinogradCoeff winograd_coeff(m, r, + interp_points); + output_compute_type* mid_buf1 = transform_mid_buf; + output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; + OutputGetter getter(dtype); + OutputVisitor output_visitor(oc_end - oc_start); + + for (size_t oc = oc_start; oc < oc_end; oc++) { + /* gather */ + rep(i, alpha) rep(j, alpha) { + mid_buf1[i * alpha + j] = output_transform_buf[output_visitor.get( + alpha, oc - oc_start, oc, nr_units_in_tile, unit_idx, i, + j)]; + } + /* A[alpha*m] M[alpha*alpha] */ + megdnn::naive::run_matrix_mul_tpl( + winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m, alpha, + alpha, m, alpha, alpha, dtype, dtype); + megdnn::naive::run_matrix_mul_tpl( + mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m, + alpha, alpha, m, m, dtype, dtype); + + rep(i, m) rep(j, m) { + auto oh = oh_start + i; + auto ow = ow_start + j; + if (oh < OH && ow < OW) { + float val = mid_buf1[i * m + j]; + if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) { + val += bias[oc] * input_filter_rescale * + input_filter_rescale; + } else if (bmode == BiasMode::BIAS) { + val += bias[output_visitor.put(oc, OH, OW, oh, ow)] * + input_filter_rescale * input_filter_rescale; } + val = val * input_filter_scale / + (input_filter_rescale * input_filter_rescale * rescale * + rescale); + if (nonline_mode == NonlineMode::RELU) { + val = val > 0 ? val : 0; + } else if (nonline_mode == NonlineMode::SIGMOID) { + val = 1.f / (expf(-val) + 1.f); + } else if (nonline_mode == NonlineMode::H_SWISH) { + val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f; + } else { + megdnn_assert(nonline_mode == NonlineMode::IDENTITY); + } + output[output_visitor.put(oc, OH, OW, oh, ow)] = getter(val); } } } - - static size_t pack_size; }; -template -size_t StrategyHelper< - ctype, dst_type, input_filter_compute_type, output_compute_type, format, - std::enable_if_t>::pack_size = - MatrixMulForward::pack_size(format); - -#define INST(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type) \ - template class StrategyHelper< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::MatrixMul::Format::DEFAULT>; +#define INST(_ctype, _dst_type, _input_filter_compute_type, \ + _output_compute_type) \ + template class StrategyHelper<_ctype, _dst_type, \ + _input_filter_compute_type, \ + _output_compute_type>; INST(float, float, float, float) MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16)) @@ -428,234 +360,23 @@ INST(uint8_t, uint8_t, int16_t, int) #undef INST #define INST(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type) \ + _output_compute_type, layout) \ template class StrategyHelper< \ _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::MatrixMul::Format::MK4>; -INST(float, float, float, float) + _output_compute_type, layout, param::MatrixMul::Format::MK4>; +INST(float, float, float, float, param::ConvBias::Format::NCHW) #undef INST #define INST(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type) \ + _output_compute_type, layout) \ template class StrategyHelper< \ _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::MatrixMul::Format::MK8>; -INST(int8_t, int8_t, int16_t, int) -MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16)) -#undef INST - -template -class StrategyHelperNchwxx< - ctype, dst_type, input_filter_compute_type, output_compute_type, format, - std::enable_if_t> { -public: - static void filter(const ctype* filter, - input_filter_compute_type* filter_transform_buf, - input_filter_compute_type* transform_mid_buf, size_t OC, - size_t IC, size_t oc_start, size_t oc_end, size_t m, - size_t r, const std::vector& interp_points, - DType dtype, float rescale) { - megdnn_assert( - (oc_end - oc_start) % 8 == 0 && oc_start % 8 == 0 && - oc_end % 8 == 0 && IC % 8 == 0 && OC % 8 == 0, - "Winograd filter transform input param is not times of 8!"); - - size_t alpha = m + r - 1; - WinogradCoeff winograd_coeff(m, r, - interp_points); - - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - Getter getter(dtype); - size_t OCB = OC / pack_size; - size_t ICB = IC / pack_size; - for (size_t oc = oc_start; oc < oc_end; oc++) { - rep(ic, IC) { - size_t ocb = oc / pack_size; - size_t oc_pack = oc % pack_size; - size_t icb = ic / pack_size; - size_t ic_pack = ic % pack_size; - - const ctype* filter_ptr = - filter + (ocb * (IC / 8) + icb) * r * r * 8 * 8 + - ic_pack * 8 + oc_pack; - rep(i, r) rep(j, r) { - mid_buf1[i * r + j] = - getter(filter_ptr[(i * r + j) * 8 * 8]); - } - - /* tmp = Matmul(G, src) */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.G(rescale).data(), mid_buf1, mid_buf2, - alpha, r, r, r, r, r, dtype, dtype); - /* dst = Matmul(tmp, G^T) */ - megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.G(rescale).data(), mid_buf1, - alpha, alpha, r, r, r, alpha, dtype, dtype); - - rep(i, alpha) rep(j, alpha) { - filter_transform_buf[(i * alpha + j) * OCB * ICB * - pack_size * pack_size + - ocb * ICB * pack_size * pack_size + - icb * pack_size * pack_size + - ic_pack * pack_size + oc_pack] = - mid_buf1[i * alpha + j]; - } - } - } - } - - static void input(const ctype* input, - input_filter_compute_type* input_transform_buf, - input_filter_compute_type* transform_mid_buf, - int ih_start, int iw_start, size_t IH, size_t IW, - size_t IC, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float rescale) { - size_t alpha = m + r - 1; - Getter getter(dtype); - WinogradCoeff winograd_coeff(m, r, - interp_points); - size_t ICB = IC / pack_size; - rep(ic, IC) { - size_t icb = ic / pack_size; - size_t ic_pack = ic % pack_size; - input_filter_compute_type* mid_buf1 = transform_mid_buf; - input_filter_compute_type* mid_buf2 = - transform_mid_buf + alpha * alpha; - - memset(mid_buf1, 0, - alpha * alpha * sizeof(input_filter_compute_type)); - rep(i, alpha) rep(j, alpha) { - int ih = ih_start + i; - int iw = iw_start + j; - if (ih >= 0 && ih < (int)IH && iw >= 0 && iw < (int)IW) { - mid_buf1[i * alpha + j] = getter( - input[(icb * IH * IW + ih * IW + iw) * pack_size + - ic_pack]); - } - } - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.B(rescale).data(), mid_buf1, mid_buf2, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); - megdnn::naive::run_matrix_mul_tpl( - mid_buf2, winograd_coeff.B(rescale).data(), mid_buf1, alpha, - alpha, alpha, alpha, alpha, alpha, dtype, dtype); - rep(i, alpha) rep(j, alpha) { - input_transform_buf[(i * alpha + j) * ICB * nr_units_in_tile * - pack_size + - icb * nr_units_in_tile * pack_size + - unit_idx * pack_size + ic_pack] = - mid_buf1[i * alpha + j]; - } - } - } - - static void output(const output_compute_type* output_transform_buf, - const output_compute_type* bias, dst_type* output, - output_compute_type* transform_mid_buf, BiasMode bmode, - NonlineMode nonline_mode, size_t oh_start, - size_t ow_start, size_t OH, size_t OW, size_t oc_start, - size_t oc_end, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float input_filter_scale, float input_filter_rescale, - float rescale) { - size_t alpha = m + r - 1; - size_t OC = oc_end - oc_start; - - OutputGetter getter(dtype); - winograd::WinogradCoeff winograd_coeff( - m, r, interp_points); - size_t OCB = OC / pack_size; - for (size_t oc = oc_start; oc < oc_end; oc++) { - output_compute_type* mid_buf1 = transform_mid_buf; - output_compute_type* mid_buf2 = transform_mid_buf + alpha * alpha; - - size_t ocb = (oc - oc_start) / pack_size; - size_t oc_pack = oc % pack_size; - // gather - rep(i, alpha) rep(j, alpha) { - mid_buf1[i * alpha + j] = output_transform_buf - [(i * alpha + j) * OCB * nr_units_in_tile * pack_size + - ocb * nr_units_in_tile * pack_size + - unit_idx * pack_size + oc_pack]; - } - /* A[alpha*m] M[alpha*alpha] */ - megdnn::naive::run_matrix_mul_tpl( - winograd_coeff.A(rescale).data(), mid_buf1, mid_buf2, m, - alpha, alpha, m, alpha, alpha, dtype, dtype); - megdnn::naive::run_matrix_mul_tpl< - output_compute_type, output_compute_type, false, false>( - mid_buf2, winograd_coeff.A(rescale).data(), mid_buf1, m, m, - alpha, alpha, m, m, dtype, dtype); - rep(i, m) rep(j, m) { - auto oh = oh_start + i; - auto ow = ow_start + j; - if (oh < OH && ow < OW) { - float val = mid_buf1[i * m + j]; - if (bmode == BiasMode::BROADCAST_CHANNEL_BIAS) { - val += bias[oc] * input_filter_rescale * - input_filter_rescale; - } else if (bmode == BiasMode::BIAS) { - val += bias[(oc / pack_size * OH * OW + oh * OW + ow) * - pack_size + - oc_pack] * - input_filter_rescale * input_filter_rescale; - } - val = val * input_filter_scale / - (input_filter_rescale * input_filter_rescale * - rescale * rescale); - if (nonline_mode == NonlineMode::RELU) { - val = val > 0 ? val : 0; - } else if (nonline_mode == NonlineMode::SIGMOID) { - val = 1.f / (expf(-val) + 1.f); - } else if (nonline_mode == NonlineMode::H_SWISH) { - val = val * std::min(std::max(val + 3, 0.f), 6.f) / 6.f; - } else { - megdnn_assert(nonline_mode == NonlineMode::IDENTITY); - } - - output[(oc / pack_size * OH * OW + oh * OW + ow) * - pack_size + - oc_pack] = getter(val); - } - } - } - } - - static size_t pack_size; -}; - -template -size_t StrategyHelperNchwxx< - ctype, dst_type, input_filter_compute_type, output_compute_type, format, - std::enable_if_t>::pack_size = - MatrixMulForward::pack_size(format); - -#define INST(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type) \ - template class StrategyHelperNchwxx< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, param::MatrixMul::Format::MK8>; -INST(float, float, float, float) + _output_compute_type, layout, param::MatrixMul::Format::MK8>; +INST(int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW) +INST(float, float, float, float, param::ConvBias::Format::NCHW88) +MEGDNN_INC_FLOAT16(INST(dt_float16, dt_float16, dt_float16, dt_float16, + param::ConvBias::Format::NCHW)) #undef INST - - - } // namespace winograd } // namespace megdnn diff --git a/dnn/src/common/winograd/winograd_helper.h b/dnn/src/common/winograd/winograd_helper.h index bdbec6203..c2cd945bc 100644 --- a/dnn/src/common/winograd/winograd_helper.h +++ b/dnn/src/common/winograd/winograd_helper.h @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #pragma once @@ -28,8 +29,8 @@ using BiasMode = ConvBiasForward::BiasMode; */ template + param::ConvBias::Format layout = param::ConvBias::Format::NCHW, + param::MatrixMul::Format format = param::MatrixMul::Format::DEFAULT> class StrategyHelper { public: static void filter(const ctype* filter, @@ -61,47 +62,6 @@ public: float rescale = 1.0f); }; -/** - * \brief Strategy helper, contains some helper function for debug kernel - * implementation - * - * \warning The layout should be NCHW88 - */ -template -class StrategyHelperNchwxx { -public: - static void filter(const ctype* filter, - input_filter_compute_type* filter_transform_buf, - input_filter_compute_type* transform_mid_buf, size_t OC, - size_t IC, size_t oc_start, size_t oc_end, size_t m, - size_t r, const std::vector& interp_points, - DType dtype, float rescale = 1.0f); - - static void input(const ctype* input, - input_filter_compute_type* input_transform_buf, - input_filter_compute_type* transform_mid_buf, - int ih_start, int iw_start, size_t IH, size_t IW, - size_t IC, size_t unit_idx, size_t nr_units_in_tile, - size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float rescale = 1.0f); - - static void - output(const output_compute_type* output_transform_buf, - const output_compute_type* bias, dst_type* output, - output_compute_type* transform_mid_buf, BiasMode bmode, - NonlineMode nonline_mode, size_t oh_start, size_t ow_start, - size_t OH, size_t OW, size_t oc_start, size_t oc_end, - size_t unit_idx, size_t nr_units_in_tile, size_t m, size_t r, - const std::vector& interp_points, DType dtype, - float input_filter_scale = 1.0f, // input_scale * filter_scale - float input_filter_rescale = 1.0f, // input_rescale * filter_rescale - float rescale = 1.0f); -}; - } // namespace winograd } // namespace megdnn // vim: syntax=cpp.doxygen diff --git a/dnn/src/fallback/conv_bias/winograd/strategy.cpp b/dnn/src/fallback/conv_bias/winograd/strategy.cpp index 579dbdcc2..de0ff614b 100644 --- a/dnn/src/fallback/conv_bias/winograd/strategy.cpp +++ b/dnn/src/fallback/conv_bias/winograd/strategy.cpp @@ -6,13 +6,14 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/fallback/conv_bias/winograd/strategy.h" -#include "src/fallback/conv_bias/winograd/winograd.h" -#include "src/common/winograd/winograd_helper.h" #include "src/common/utils.h" +#include "src/common/winograd/winograd_helper.h" +#include "src/fallback/conv_bias/winograd/winograd.h" namespace megdnn { namespace fallback { @@ -60,7 +61,7 @@ void winograd_2x3_4x4_f::filter(const float* filter, float* transform_mid_buf, size_t OC, size_t IC, size_t oc_start, size_t oc_end) { ::megdnn::winograd::StrategyHelper< - float, float, float, float, + float, float, float, float, param::ConvBias::Format::NCHW, param::MatrixMul::Format::MK4>::filter(filter, filter_transform_buf, transform_mid_buf, OC, IC, oc_start, oc_end, @@ -73,11 +74,15 @@ void winograd_2x3_4x4_f::input(const float* input, float* input_transform_buf, float* transform_mid_buf, int ih_start, int iw_start, size_t IH, size_t IW, size_t IC, size_t unit_idx, size_t nr_units_in_tile) { - ::megdnn::winograd::StrategyHelper:: - input(input, input_transform_buf, transform_mid_buf, ih_start, - iw_start, IH, IW, IC, unit_idx, nr_units_in_tile, - OUTPUT_BLOCK_SIZE, KERNEL_SIZE, {0, 1, -1}, src_dtype); + ::megdnn::winograd::StrategyHelper< + float, float, float, float, param::ConvBias::Format::NCHW, + param::MatrixMul::Format::MK4>::input(input, input_transform_buf, + transform_mid_buf, ih_start, + iw_start, IH, IW, IC, + unit_idx, nr_units_in_tile, + OUTPUT_BLOCK_SIZE, + KERNEL_SIZE, {0, 1, -1}, + src_dtype); } void winograd_2x3_4x4_f::output(const float* output_transform_buf, @@ -87,16 +92,19 @@ void winograd_2x3_4x4_f::output(const float* output_transform_buf, size_t ow_start, size_t OH, size_t OW, size_t oc_start, size_t oc_end, size_t unit_idx, size_t nr_units_in_tile) { - ::megdnn::winograd::StrategyHelper:: - output(output_transform_buf, bias, output, transform_mid_buf, bmode, - nonline_mode, oh_start, ow_start, OH, OW, oc_start, oc_end, - unit_idx, nr_units_in_tile, OUTPUT_BLOCK_SIZE, KERNEL_SIZE, - {0, 1, -1}, dst_dtype); + ::megdnn::winograd::StrategyHelper< + float, float, float, float, param::ConvBias::Format::NCHW, + param::MatrixMul::Format::MK4>::output(output_transform_buf, bias, + output, transform_mid_buf, + bmode, nonline_mode, + oh_start, ow_start, OH, OW, + oc_start, oc_end, unit_idx, + nr_units_in_tile, + OUTPUT_BLOCK_SIZE, + KERNEL_SIZE, {0, 1, -1}, + dst_dtype); } - - MEGDNN_REG_WINOGRAD_STRATEGY_IMPL(winograd_2x3_1x1_qs8) void winograd_2x3_1x1_qs8::filter(const int8_t* filter, @@ -136,7 +144,6 @@ void winograd_2x3_1x1_qs8::output(const int* output_transform_buf, {0, 1, -1}, dst_dtype, scale_input * scale_filter, 2.0f, 1.0f); } - MEGDNN_REG_WINOGRAD_STRATEGY_IMPL(winograd_2x3_8x8_qs8) void winograd_2x3_8x8_qs8::filter(const int8_t* filter, @@ -144,7 +151,7 @@ void winograd_2x3_8x8_qs8::filter(const int8_t* filter, int16_t* transform_mid_buf, size_t OC, size_t IC, size_t oc_start, size_t oc_end) { ::megdnn::winograd::StrategyHelper< - int8_t, int8_t, int16_t, int, + int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW, param::MatrixMul::Format::MK8>::filter(filter, filter_transform_buf, transform_mid_buf, OC, IC, oc_start, oc_end, @@ -158,11 +165,15 @@ void winograd_2x3_8x8_qs8::input(const int8_t* input, int16_t* transform_mid_buf, int ih_start, int iw_start, size_t IH, size_t IW, size_t IC, size_t unit_idx, size_t nr_units_in_tile) { - ::megdnn::winograd::StrategyHelper:: - input(input, input_transform_buf, transform_mid_buf, ih_start, - iw_start, IH, IW, IC, unit_idx, nr_units_in_tile, - OUTPUT_BLOCK_SIZE, KERNEL_SIZE, {0, 1, -1}, src_dtype, 1.0f); + ::megdnn::winograd::StrategyHelper< + int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW, + param::MatrixMul::Format::MK8>::input(input, input_transform_buf, + transform_mid_buf, ih_start, + iw_start, IH, IW, IC, + unit_idx, nr_units_in_tile, + OUTPUT_BLOCK_SIZE, + KERNEL_SIZE, {0, 1, -1}, + src_dtype, 1.0f); } void winograd_2x3_8x8_qs8::output(const int* output_transform_buf, @@ -180,13 +191,19 @@ void winograd_2x3_8x8_qs8::output(const int* output_transform_buf, megdnn_assert(filter_dtype.enumv() == DTypeEnum::QuantizedS16); scale_filter = filter_dtype.param().scale; } - ::megdnn::winograd::StrategyHelper:: - output(output_transform_buf, bias, output, transform_mid_buf, bmode, - nonline_mode, oh_start, ow_start, OH, OW, oc_start, oc_end, - unit_idx, nr_units_in_tile, OUTPUT_BLOCK_SIZE, KERNEL_SIZE, - {0, 1, -1}, dst_dtype, scale_input * scale_filter, 2.0f, - 1.0f); + ::megdnn::winograd::StrategyHelper< + int8_t, int8_t, int16_t, int, param::ConvBias::Format::NCHW, + param::MatrixMul::Format::MK8>::output(output_transform_buf, bias, + output, transform_mid_buf, + bmode, nonline_mode, + oh_start, ow_start, OH, OW, + oc_start, oc_end, unit_idx, + nr_units_in_tile, + OUTPUT_BLOCK_SIZE, + KERNEL_SIZE, {0, 1, -1}, + dst_dtype, + scale_input * scale_filter, + 2.0f, 1.0f); } } // namespace winograd diff --git a/dnn/src/fallback/conv_bias/winograd/strategy.h b/dnn/src/fallback/conv_bias/winograd/strategy.h index ed1d3ad25..fe186cb9c 100644 --- a/dnn/src/fallback/conv_bias/winograd/strategy.h +++ b/dnn/src/fallback/conv_bias/winograd/strategy.h @@ -28,6 +28,7 @@ MEGDNN_REG_WINOGRAD_STRATEGY(int8_t, int8_t, int16_t, int, 2, 3, 1, 1, MEGDNN_REG_WINOGRAD_STRATEGY(int8_t, int8_t, int16_t, int, 2, 3, 8, 8, winograd_2x3_8x8_qs8) + } } // namespace fallback } // namespace megdnn diff --git a/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp b/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp index 4a012592b..b5db83e7f 100644 --- a/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp +++ b/dnn/src/naive/winograd_filter_preprocess/opr_impl.cpp @@ -6,7 +6,8 @@ * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or + * implied. */ #include "src/naive/winograd_filter_preprocess/opr_impl.h" @@ -49,17 +50,16 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, size_t m = param().output_block_size; bool execed = false; -#define cb(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format, rescale) \ - if (param().format == _format) { \ - return winograd::StrategyHelper< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format>::filter(src_ptr, dst_ptr, \ - workspace_ptr, OC, IC, \ - 0, OC, m, FW, \ - interp_points, \ - src.layout.dtype, \ - rescale); \ + +#define cb(_ctype, _dst_type, _input_filter_compute_type, \ + _output_compute_type, _format, rescale) \ + if (param().format == _format) { \ + return winograd::StrategyHelper< \ + _ctype, _dst_type, _input_filter_compute_type, \ + _output_compute_type, param::ConvBias::Format::NCHW, \ + _format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ + OC, m, FW, interp_points, src.layout.dtype, \ + rescale); \ } #define DISPATCH_FORMAT_MK4(_ctype, _dst_type, _input_filter_compute_type, \ @@ -110,8 +110,9 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, DISPATCH_KERNEL(dt_float16, dt_float16, dt_float16, dt_float16, \ DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 2); \ }) - //! normal nchw mode + if (src.layout.ndim <= 5) { + //! dispatch_dtype with consider layout and format. if (FW == 3) { if (m == 2) { std::vector interp_points = {0, 1, -1}; @@ -131,22 +132,20 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, DISPATCH_DTYPE(3); } } - } #undef cb #undef DISPATCH_FORMAT_MK4 #undef DISPATCH_FORMAT_MK8 #undef DISPATCH_DTYPE -#define cb(_ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format, rescale) \ - if (param().format == _format) { \ - return winograd::StrategyHelperNchwxx< \ - _ctype, _dst_type, _input_filter_compute_type, \ - _output_compute_type, _format>::filter(src_ptr, dst_ptr, \ - workspace_ptr, OC, IC, \ - 0, OC, m, FW, \ - interp_points, \ - src.layout.dtype, \ - rescale); \ + } else { +#define cb(_ctype, _dst_type, _input_filter_compute_type, \ + _output_compute_type, _format, rescale) \ + if (param().format == _format) { \ + return winograd::StrategyHelper< \ + _ctype, _dst_type, _input_filter_compute_type, \ + _output_compute_type, param::ConvBias::Format::NCHW88, \ + _format>::filter(src_ptr, dst_ptr, workspace_ptr, OC, IC, 0, \ + OC, m, FW, interp_points, src.layout.dtype, \ + rescale); \ } #define DISPATCH_FORMAT_MK8(_ctype, _dst_type, _input_filter_compute_type, \ @@ -159,8 +158,6 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, DISPATCH_KERNEL(dt_float32, dt_float32, dt_float32, dt_float32, \ DISPATCH_FORMAT_MK8, 1.0f, _midout_tag, 0); \ } - //! nchwxx mode - else { megdnn_assert(src.layout.ndim == 6 || src.layout.ndim == 7); if (FW == 3) { if (m == 2) { @@ -171,11 +168,11 @@ void WinogradFilterPreprocessImpl::exec(_megdnn_tensor_in src, DISPATCH_DTYPE(5); } } - } #undef cb #undef DISPATCH_FORMAT_MK8 #undef DISPATCH_KERNEL #undef DISPATCH_DTYPE + } megdnn_assert(execed, "Unsupport winograd filter preprocess. m: %zu src: %s", m, src.layout.to_string().c_str()); -- GitLab