diff --git a/dnn/scripts/opr_param_defs.py b/dnn/scripts/opr_param_defs.py index 264b005f6cf1a5aa84526d40ed1890991ea8a67b..d4ad3710f3299c91d733c1195e07c00916a54bf4 100755 --- a/dnn/scripts/opr_param_defs.py +++ b/dnn/scripts/opr_param_defs.py @@ -220,7 +220,7 @@ pdef('Axis').add_fields('int32', 'axis', 0) (pdef('Images2Neibs'). add_fields('uint32', 'pad_h', 0, 'pad_w', 0, 'stride_h', 1, 'stride_w', 1, - 'window_h', 3, 'window_w', 3)) + 'dilate_h', 1, 'dilate_w', 1, 'window_h', 3, 'window_w', 3)) (pdef('Pooling', version=0, is_legacy=True). add_enum( diff --git a/dnn/src/common/images2neibs.cpp b/dnn/src/common/images2neibs.cpp index 5345f9e7115840962e863d475e5bdb982eef9459..24dff13cecbc8a842e324a8920ac8a527230f3d4 100644 --- a/dnn/src/common/images2neibs.cpp +++ b/dnn/src/common/images2neibs.cpp @@ -23,6 +23,8 @@ void Images2NeibsBase::deduce_layout_fwd(const TensorLayout &src, "pad_w=" + std::to_string(param().pad_w) + ", " + "stride_h=" + std::to_string(param().stride_h) + ", " + "stride_w=" + std::to_string(param().stride_w) + ", " + + "dilate_h=" + std::to_string(param().dilate_h) + ", " + + "dilate_w=" + std::to_string(param().dilate_w) + ", " + "window_h=" + std::to_string(param().window_h) + ", " + "window_w=" + std::to_string(param().window_w); }; @@ -34,11 +36,13 @@ void Images2NeibsBase::deduce_layout_fwd(const TensorLayout &src, size_t pw = this->param().pad_w; size_t sh = this->param().stride_h; size_t sw = this->param().stride_w; + size_t dh = this->param().dilate_h; + size_t dw = this->param().dilate_w; size_t wh = this->param().window_h; size_t ww = this->param().window_w; size_t oh, ow; - infer_conv_shape2d(ih, iw, wh, ww, sh, sw, ph, pw, oh, ow); + infer_conv_shape2d(ih, iw, wh+(wh-1)*(dh-1), ww+(ww-1)*(dw-1), sh, sw, ph, pw, oh, ow); dst = TensorLayout(TensorShape({n, ic, oh, ow, wh, ww}), src.dtype); } diff --git a/dnn/src/cuda/images2neibs/kernel.cu b/dnn/src/cuda/images2neibs/kernel.cu index c972548179ccf1bc0cae40d4f92acb4c3dfd743f..e8d150d2943cc197a6b4a992fec947e42a5290ca 100644 --- a/dnn/src/cuda/images2neibs/kernel.cu +++ b/dnn/src/cuda/images2neibs/kernel.cu @@ -24,7 +24,7 @@ namespace images2neibs { template __global__ void forward_kernel(const T *src, T *dst, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int WH, int WW) + int ph, int pw, int sh, int sw, int dh, int dw, int WH, int WW) { int NC = N * C; int WP = WH*WW; @@ -37,8 +37,8 @@ __global__ void forward_kernel(const T *src, T *dst, if (op < OH * OW) { int oh = op / OW; int ow = op % OW; - int ih = -ph + sh * oh + wh; - int iw = -pw + sw * ow + ww; + int ih = -ph + sh * oh + wh* dh; + int iw = -pw + sw * ow + ww* dw; int dst_pos = nc * OH * OW * WH * WW + op * WH * WW + wp; int src_pos = nc * IH * IW + ih * IW + iw; dst[dst_pos] = (ih >= 0 && ih < IH && iw >= 0 && iw < IW) @@ -52,7 +52,7 @@ __global__ void forward_kernel(const T *src, T *dst, template void forward(const T* src, T* dst, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int wh, int ww, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, cudaStream_t stream) { int spatial_size = OH * OW; int kernel_size = wh * ww; @@ -63,7 +63,7 @@ void forward(const T* src, T* dst, int N, int C, int IH, int IW, int OH, int OW, int by = N * C; forward_kernel<<>>(src, dst, N, C, IH, IW, OH, OW, ph, pw, sh, sw, + stream>>>(src, dst, N, C, IH, IW, OH, OW, ph, pw, sh, sw, dh, dw, wh, ww); after_kernel_launch(); } @@ -73,7 +73,7 @@ void forward(const T* src, T* dst, int N, int C, int IH, int IW, int OH, int OW, template __global__ void backward_kernel(const T *diff, T *grad, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int WH, int WW) + int ph, int pw, int sh, int sw, int dh, int dw, int WH, int WW) { int id = threadIdx.x + blockIdx.x * blockDim.x; if (id < N*C*IH*IW) { @@ -82,17 +82,20 @@ __global__ void backward_kernel(const T *diff, T *grad, int iw = id % (IH*IW) % IW; grad[nc*IH*IW + ih*IW + iw] = 0.0f; int oh_max = min((ih+ph) / sh, OH-1); - int oh_min = max((ih+ph-(WH-1)+sh-1) / sh, 0); + int oh_min = max((ih+ph-(WH-1)*dh+sh-1) / sh, 0); int ow_max = min((iw+pw) / sw, OW-1); - int ow_min = max((iw+pw-(WW-1)+sw-1) / sw, 0); + int ow_min = max((iw+pw-(WW-1)*dw+sw-1) / sw, 0); for (int oh = oh_min; oh <= oh_max; ++oh) for (int ow = ow_min; ow <= ow_max; ++ow) { - int wh = ih+ph - sh*oh; - int ww = iw+pw - sw*ow; - grad[nc*IH*IW + ih*IW + iw] += - diff[nc*OH*OW*WH*WW + oh*OW*WH*WW + ow*WH*WW + - wh*WW + ww]; + if ((ih+ph - sh*oh)%dh==0 && (iw+pw - sw*ow)%dw==0){ + int wh = ih+ph - sh*oh - (ih+ph - sh*oh)/dh * (dh-1); + int ww = iw+pw - sw*ow - (iw+pw - sw*ow)/dw * (dw-1); + grad[nc*IH*IW + ih*IW + iw] += + diff[nc*OH*OW*WH*WW + oh*OW*WH*WW + ow*WH*WW + + wh*WW + ww]; + + } } } } @@ -100,23 +103,23 @@ __global__ void backward_kernel(const T *diff, T *grad, template void backward(const T *diff, T *grad, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int wh, int ww, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, cudaStream_t stream) { int threads = NR_THREADS; int blocks = DIVUP(N*C*IH*IW, threads); backward_kernel<<>>(diff, grad, N, C, IH, IW, OH, OW, - ph, pw, sh, sw, wh, ww); + ph, pw, sh, sw, dh, dw, wh, ww); after_kernel_launch(); } #define INST(T) \ template void forward(const T *, T *, int, int, int, int, int, int, \ - int, int, int, int, int, int, \ + int, int, int, int, int, int, int, int, \ cudaStream_t); \ template void backward(const T *, T *, int, int, int, int, int, int, \ - int, int, int, int, int, int, \ + int, int, int, int, int, int, int, int, \ cudaStream_t); #define cb(DType) \ INST(DTypeTrait::ctype) diff --git a/dnn/src/cuda/images2neibs/kernel.cuh b/dnn/src/cuda/images2neibs/kernel.cuh index a25ff996096ce6e3c58b8518e0d58b3e2f56f7e8..acef54164cb8b75009d7c92afea8d4f9f4b0e0e5 100644 --- a/dnn/src/cuda/images2neibs/kernel.cuh +++ b/dnn/src/cuda/images2neibs/kernel.cuh @@ -18,13 +18,13 @@ namespace images2neibs { template void forward(const T *src, T *dst, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int wh, int ww, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, cudaStream_t stream); template void backward(const T *diff, T *grad, int N, int C, int IH, int IW, int OH, int OW, - int ph, int pw, int sh, int sw, int wh, int ww, + int ph, int pw, int sh, int sw, int dh, int dw, int wh, int ww, cudaStream_t stream); } // namespace images2neibs diff --git a/dnn/src/cuda/images2neibs/opr_impl.cpp b/dnn/src/cuda/images2neibs/opr_impl.cpp index dd642191ce54717c0507d0889a806a980f4c9194..4761fd4edb786ac5eaccd86f2e286005398b0a5a 100644 --- a/dnn/src/cuda/images2neibs/opr_impl.cpp +++ b/dnn/src/cuda/images2neibs/opr_impl.cpp @@ -27,13 +27,14 @@ void Images2NeibsForwardImpl::exec(_megdnn_tensor_in src, int OH = dst.layout[2], OW = dst.layout[3]; int ph = param().pad_h, pw = param().pad_w; int sh = param().stride_h, sw = param().stride_w; + int dh = param().dilate_h, dw = param().dilate_w; int wh = param().window_h, ww = param().window_w; #define cb(DType) \ if (src.layout.dtype.enumv() == DTypeTrait::enumv) { \ using T = DTypeTrait::ctype; \ images2neibs::forward(src.ptr(), dst.ptr(), \ N, C, IH, IW, OH, OW, \ - ph, pw, sh, sw, wh, ww, \ + ph, pw, sh, sw, dh, dw, wh, ww, \ stream); \ return; \ } @@ -53,13 +54,14 @@ void Images2NeibsBackwardImpl::exec(_megdnn_tensor_in diff, int OH = diff.layout[2], OW = diff.layout[3]; int ph = param().pad_h, pw = param().pad_w; int sh = param().stride_h, sw = param().stride_w; + int dh = param().dilate_h, dw = param().dilate_w; int wh = param().window_h, ww = param().window_w; #define cb(DType) \ if (diff.layout.dtype == DType()) { \ using T = DTypeTrait::ctype; \ images2neibs::backward(diff.ptr(), grad.ptr(), \ N, C, IH, IW, OH, OW, \ - ph, pw, sh, sw, wh, ww, \ + ph, pw, sh, sw, dh, dw, wh, ww, \ stream); \ return; \ } diff --git a/dnn/src/naive/images2neibs/opr_impl.cpp b/dnn/src/naive/images2neibs/opr_impl.cpp index 20d7794143db4102d476ea41d531aff102879bb4..208e962364fc13bc2fc41317c1455e598571de5c 100644 --- a/dnn/src/naive/images2neibs/opr_impl.cpp +++ b/dnn/src/naive/images2neibs/opr_impl.cpp @@ -33,20 +33,25 @@ void Images2NeibsForwardImpl::exec_internal(_megdnn_tensor_in src, int pad_w = static_cast(param().pad_w); int stride_h = static_cast(param().stride_h); int stride_w = static_cast(param().stride_w); + int dilate_h = static_cast(param().dilate_h); + int dilate_w = static_cast(param().dilate_w); + int equ_window_h = dilate_h * (window_h-1) + 1; + int equ_window_w = dilate_w * (window_w-1) + 1; for (int n = 0; n < N; ++n) for (int c = 0; c < C; ++c) { int ih = -pad_h; - for (; ih+window_h <= IH+pad_h; ih += stride_h) { + for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) { int iw = -pad_w; - for (; iw+window_w <= IW+pad_w; iw += stride_w) { + for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) { for (int kh = 0; kh < window_h; ++kh) for (int kw = 0; kw < window_w; ++kw) { + int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw; dptr[idx*window_h*window_w + kh*window_w + kw] = - (ih+kh) >= 0 && (ih+kh) < IH && - (iw+kw) >= 0 && (iw+kw) < IW ? - sptr[n*C*IH*IW + c*IH*IW + (ih+kh)*IW + (iw+kw)] : 0.0f; + ih2 >= 0 && ih2 < IH && + iw2 >= 0 && iw2 < IW ? + sptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] : 0.0f; } ++idx; } @@ -86,18 +91,22 @@ void Images2NeibsBackwardImpl::exec_internal(_megdnn_tensor_in diff, int pad_w = static_cast(param().pad_w); int stride_h = static_cast(param().stride_h); int stride_w = static_cast(param().stride_w); + int dilate_h = static_cast(param().dilate_h); + int dilate_w = static_cast(param().dilate_w); + int equ_window_h = dilate_h * (window_h-1) + 1; + int equ_window_w = dilate_w * (window_w-1) + 1; memset(sptr, 0, sizeof(T) * N*C*IH*IW); for (int n = 0; n < N; ++n) for (int c = 0; c < C; ++c) { int ih = -pad_h; - for (; ih+window_h <= IH+pad_h; ih += stride_h) { + for (; ih+equ_window_h <= IH+pad_h; ih += stride_h) { int iw = -pad_w; - for (; iw+window_w <= IW+pad_w; iw += stride_w) { + for (; iw+equ_window_w <= IW+pad_w; iw += stride_w) { for (int kh = 0; kh < window_h; ++kh) for (int kw = 0; kw < window_w; ++kw) { - int ih2 = ih+kh, iw2 = iw+kw; + int ih2 = ih+dilate_h*kh, iw2 = iw+dilate_w*kw; if (ih2 >= 0 && ih2 < IH && iw2 >= 0 && iw2 < IW) { sptr[n*C*IH*IW + c*IH*IW + ih2*IW + iw2] += dptr[idx*window_h*window_w + kh*window_w + kw]; diff --git a/dnn/test/common/images2neibs.h b/dnn/test/common/images2neibs.h index 026016512a8d7b7bbb0a2c48ca0bf99e19e38756..086314e196999d2fd16819f9f9d542533950671b 100644 --- a/dnn/test/common/images2neibs.h +++ b/dnn/test/common/images2neibs.h @@ -31,17 +31,19 @@ inline std::vector get_args() { for (uint32_t pw : {0, 1}) for (uint32_t sh : {1, 2}) for (uint32_t sw : {1, 2}) + for (uint32_t dh : {1, 2, 3}) + for (uint32_t dw : {1, 2, 3}) for (uint32_t wh : {3, 4}) for (uint32_t ww : {3, 4}) { - args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, wh, ww}, - TensorShape{2, 3, 5, 6}); + args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, ww}, + TensorShape{2, 3, 19, 20}); } // clang-format on // large window case - args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 32, 64}, + args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 1, 1, 32, 64}, TensorShape{2, 3, 96, 128}); // large size - args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 1, 1}, + args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, TensorShape{128, 128, 28, 24}); return args; @@ -54,17 +56,19 @@ inline std::vector get_benchmark_args() { for (uint32_t pw : {0, 1}) for (uint32_t sh : {1, 2}) for (uint32_t sw : {1, 2}) + for (uint32_t dh : {1, 2}) + for (uint32_t dw : {1, 2}) for (uint32_t wh : {3, 4}) for (uint32_t ww : {3, 4}) for (uint32_t b : {1, 64}) for (uint32_t c : {64, 128}) for (uint32_t hw : {64, 128}) { - args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, wh, ww}, + args.emplace_back(param::Images2Neibs{ph, pw, sh, sw, dh, dw, wh, ww}, TensorShape{b, c, hw, hw}); } // clang-format on // large size - args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 1, 1}, + args.emplace_back(param::Images2Neibs{0, 0, 1, 1, 1, 1, 1, 1}, TensorShape{1024, 128, 28, 24}); return args; diff --git a/dnn/test/naive/images2neibs.cpp b/dnn/test/naive/images2neibs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..1c011bd7013988b910227254686e9caada8393f0 --- /dev/null +++ b/dnn/test/naive/images2neibs.cpp @@ -0,0 +1,59 @@ +/** + * \file dnn/test/naive/images2neibs.cpp + * MegEngine is Licensed under the Apache License, Version 2.0 (the "License") + * + * Copyright (c) 2014-2021 Megvii Inc. All rights reserved. + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + */ +#include "test/naive/fixture.h" + +#include "megdnn/oprs/nn.h" +#include "test/common/checker.h" + +using namespace megdnn; +using namespace test; + +TEST_F(NAIVE, IMAGES2NEIBS_FORWARD) { + Checker checker(handle(), /* check_dispatch */false); + + Images2Neibs::Param param(0,0,1,1,1,1,2,2); + checker.set_param(param).exect( + Testcase{TensorValue({1, 1, 3, 3}, dtype::Uint8(), + {0,1,2, + 3,4,5, + 6,7,8}), {}}, + Testcase{{}, + TensorValue({1, 1, 2, 2, 2, 2}, dtype::Uint8(), + {0,1,3,4, + 1,2,4,5, + 3,4,6,7, + 4,5,7,8})}); + + param.pad_h = 1; + param.pad_w = 1; + param.stride_h = 2; + param.stride_w = 2; + param.dilate_h = 2; + param.dilate_w = 2; + param.window_h = 3; + param.window_w = 3; + checker.set_param(param).exect( + Testcase{TensorValue({1, 1, 6, 7}, dtype::Uint8(), + {0,1,2,3,4,5,6, + 7,8,9,10,11,12,13, + 14,15,16,17,18,19,20, + 21,22,23,24,25,26,27, + 28,29,30,31,32,33,34, + 35,36,37,38,39,40,41}), {}}, + Testcase{{}, + TensorValue({1, 1, 2, 3, 3, 3}, dtype::Uint8(), + {0,0,0,0,8,10,0,22,24, + 0,0,0,8,10,12,22,24,26, + 0,0,0,10,12,0,24,26,0, + 0,8,10,0,22,24,0,36,38, + 8,10,12,22,24,26,36,38,40, + 10,12,0,24,26,0,38,40,0})}); +} diff --git a/imperative/python/megengine/functional/nn.py b/imperative/python/megengine/functional/nn.py index 23106cb076d28f33044d09bdda710cd4eaf1a9b1..3d46c400de82b6f20a54627284a7dfd496b16dce 100644 --- a/imperative/python/megengine/functional/nn.py +++ b/imperative/python/megengine/functional/nn.py @@ -70,6 +70,7 @@ __all__ = [ "remap", "resize", "sigmoid", + "sliding_window", "softmax", "softplus", "sync_batch_norm", @@ -1353,6 +1354,44 @@ def indexing_one_hot( return result +def sliding_window( + inp: Tensor, + kernel_size: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]] = 0, + stride: Union[int, Tuple[int, int]] = 1, + dilation: Union[int, Tuple[int, int]] = 1, +) -> Tensor: + """ + Extracts sliding local blocks from a batched input tensor. + + Refer to :class:`~.SlidingWindow` for more information. + + :param inp: input tensor. + :param kernel_size: size of the window. + :param padding: implicit zero padding added on both sides of input. Default: 0 + :param stride: stride of the window. Default: 1 + :param dilation: dilation of the window. Default: 1 + :return: output tensor. + """ + padding_h, padding_w = _pair(padding) + stride_h, stride_w = _pair_nonzero(stride) + dilation_h, dilation_w = _pair_nonzero(dilation) + window_h, window_w = _pair_nonzero(kernel_size) + + op = builtin.Images2Neibs( + pad_h=padding_h, + pad_w=padding_w, + stride_h=stride_h, + stride_w=stride_w, + dilate_h=dilation_h, + dilate_w=dilation_w, + window_h=window_h, + window_w=window_w, + ) + (output,) = apply(op, inp) + return output + + interpolate = deprecated_func("1.3", "megengine.functional.vision", "interpolate", True) roi_pooling = deprecated_func("1.3", "megengine.functional.vision", "roi_pooling", True) roi_align = deprecated_func("1.3", "megengine.functional.vision", "roi_align", True) diff --git a/imperative/python/megengine/module/__init__.py b/imperative/python/megengine/module/__init__.py index bdffc89dd03100ef24a0f3fbf53d005494bb4854..b178fe2a9f08742874510b2816d5968efc1576f4 100644 --- a/imperative/python/megengine/module/__init__.py +++ b/imperative/python/megengine/module/__init__.py @@ -34,3 +34,4 @@ from .normalization import GroupNorm, InstanceNorm, LayerNorm from .pooling import AvgPool2d, MaxPool2d from .quant_dequant import DequantStub, QuantStub from .sequential import Sequential +from .sliding_window import SlidingWindow diff --git a/imperative/python/megengine/module/sliding_window.py b/imperative/python/megengine/module/sliding_window.py new file mode 100644 index 0000000000000000000000000000000000000000..c7da29b3d72e01c3a0eaaf8e405f7f1c5dcc0a31 --- /dev/null +++ b/imperative/python/megengine/module/sliding_window.py @@ -0,0 +1,88 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2021 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from typing import Tuple, Union + +from ..functional import sliding_window +from .module import Module + + +class SlidingWindow(Module): + r""" + Apply a sliding window to input tensor and copy content in the window to + corresponding output location. Assume input shape is :math:`(N, C, IH, IW)`, + then output shape would be :math:`(N, C, OH, OW, window_h, window_w)` where + :math:`(OH, OW)` would be computed from padding, stride, window and + :math:`(IH, IW)`, as in convolution. For each output location, we have; + + .. math:: + + out_{n, c, oh, ow, wh, ww} &= src_{n, c, ih+wh, iw+ww} \\ + \text{where } & ih=-pad_h+oh \times stride_h + (wh-1) \times (dilation_h-1) \\ + & iw=-pad_w+ow \times stride_w + (ww-1) \times (dilation_w-1) + + + :param kernel_size: the size of the window to take a max over. + :param padding: implicit zero padding to be added on both sides. Default: 0 + :param stride: the stride of the window. Default: 1 + :param dilation: the dilation of the window. Default: 1 + + Example: + + .. testcode:: + + from megengine import tensor + import megengine.module as M + import numpy as np + + inp = tensor(np.arange(30).reshape(1,1,5,6)) + op = M.SlidingWindow(kernel_size=3, padding=1, stride=2, dilation=2) + out = op(inp) + print(out.numpy()) + + Outputs: + + .. testoutput:: + + [[[[[[ 0 0 0] + [ 0 7 9] + [ 0 19 21]] + + [[ 0 0 0] + [ 7 9 11] + [19 21 23]]] + + + [[[ 0 7 9] + [ 0 19 21] + [ 0 0 0]] + + [[ 7 9 11] + [19 21 23] + [ 0 0 0]]]]]] + + """ + + def __init__( + self, + kernel_size: Union[int, Tuple[int, int]], + padding: Union[int, Tuple[int, int]] = 0, + stride: Union[int, Tuple[int, int]] = 1, + dilation: Union[int, Tuple[int, int]] = 1, + **kwargs + ): + super(SlidingWindow, self).__init__(**kwargs) + self.kernel_size = kernel_size + self.padding = padding + self.stride = stride + self.dilation = dilation + + def forward(self, inp): + return sliding_window( + inp, self.kernel_size, self.padding, self.stride, self.dilation + ) diff --git a/imperative/python/test/unit/functional/test_functional.py b/imperative/python/test/unit/functional/test_functional.py index c96865358369ab1c6dcb6b6b8a79c30529e14fac..46486bfd09965d9972530a7287ed1b9895e1ba9f 100644 --- a/imperative/python/test/unit/functional/test_functional.py +++ b/imperative/python/test/unit/functional/test_functional.py @@ -927,3 +927,28 @@ def test_neg_axis(): y = F.argmin(x, axis=(-1, -2)) yy = F.argmin(x, axis=(0, 1)) np.testing.assert_equal(y.numpy(), yy.numpy()) + + +def test_sliding_window(): + N, C, H, W = 2, 3, 7, 8 + inp = np.random.normal(size=(N, C, H, W)) + ph, pw = 1, 2 + sh, sw = 2, 1 + wh, ww = 3, 2 + dh, dw = 1, 3 + s = lambda i, p, s, d, w: (i + p * 2 - (w - 1) * d - 1) // s + 1 + inp_pad = np.zeros((N, C, H + ph * 2, W + pw * 2)) + inp_pad[:, :, ph : H + ph, pw : W + pw] = inp + gt_out = np.empty( + (N, C, s(H, ph, sh, dh, wh), s(W, pw, sw, dw, ww), wh, ww), dtype=np.float32 + ) + for n, c, oh, ow in itertools.product(*map(range, gt_out.shape[:4])): + ih, iw = oh * sh, ow * sw + gt_out[n, c, oh, ow, :] = inp_pad[ + n, c, ih : ih + (wh - 1) * dh + 1 : dh, iw : iw + (ww - 1) * dw + 1 : dw + ] + + out = F.sliding_window( + tensor(inp), (wh, ww), padding=(ph, pw), stride=(sh, sw), dilation=(dh, dw) + ) + np.testing.assert_equal(gt_out, out.numpy()) diff --git a/imperative/src/impl/ops/specializations.cpp b/imperative/src/impl/ops/specializations.cpp index bde440cce06b0248d5a8aec39a28cdd096dda7a4..13ebcced7f6d50c6b22f0091aea6adfb1fc87248 100644 --- a/imperative/src/impl/ops/specializations.cpp +++ b/imperative/src/impl/ops/specializations.cpp @@ -32,6 +32,7 @@ #include "megbrain/opr/tensor_gen.h" #include "megbrain/opr/tensor_manip.h" #include "megbrain/opr/utility.h" +#include "megbrain/opr/dnn/images2neibs.h" #include "../op_trait.h" @@ -652,4 +653,17 @@ OP_TRAIT_REG(SVD, SVD) .fallback(); }} // svd +namespace { namespace images2neibs { +auto apply_on_var_node( + const OpDef& def, + const VarNodeArray& inputs) { + auto&& op = static_cast(def); + OperatorNodeConfig config{op.make_name()}; + return opr::Images2Neibs::make(inputs[0], op.param(), config); +} +OP_TRAIT_REG(Images2Neibs, Images2Neibs) + .apply_on_var_node(apply_on_var_node) + .fallback(); +}} // images2neibs + } // namespace mgb::imperative diff --git a/src/core/include/megbrain/ir/ops.td b/src/core/include/megbrain/ir/ops.td index 164510d24adc9ca8c82af25c2ce9b3ba27e51056..3a3fa7189526ee57628d210dd7b3260deeb7fe11 100644 --- a/src/core/include/megbrain/ir/ops.td +++ b/src/core/include/megbrain/ir/ops.td @@ -79,6 +79,8 @@ def BatchConvBias : MgbHashableOp<"BatchConvBias", [BatchConvBiasParam, Executio ); } +def Images2Neibs : MgbHashableOp<"Images2Neibs", [Images2NeibsParam]>; + def BatchNorm : MgbHashableOp<"BatchNorm", [BNParam]>; def ROIAlign: MgbHashableOp<"ROIAlign", [ROIAlignParam]>;