feat(functional): add conv3d

GitOrigin-RevId: c06ac77c6c4fd7a382e0ba40c2b0d9fedb836eeb

feat(functional): add conv3d
GitOrigin-RevId: c06ac77c6c4fd7a382e0ba40c2b0d9fedb836eeb
d970b85d · Megvii Engine Team · 46cad4d3 · d970b85d · d970b85d · d970b85d
8 changed file
--- a/imperative/python/megengine/functional/nn.py
+++ b/imperative/python/megengine/functional/nn.py
@@ -22,8 +22,8 @@ from ..device import get_default_device
 from ..distributed import WORLD, is_distributed
 from ..random import uniform
 from ..tensor import Tensor
-from ..utils.tuple_function import _pair, _pair_nonzero
-from .debug_param import get_conv_execution_strategy, get_execution_strategy
+from ..utils.tuple_function import _pair, _pair_nonzero, _triple, _triple_nonzero
+from .debug_param import get_execution_strategy
 from .distributed import all_reduce_sum
 from .elemwise import exp, floor, log, log1p, maximum, minimum
 from .math import argsort, matmul, max, prod, sum
@@ -43,7 +43,9 @@ __all__ = [
    "adaptive_max_pool2d",
    "avg_pool2d",
    "batch_norm",
+    "conv1d",
    "conv2d",
+    "conv3d",
    "conv_transpose2d",
    "deformable_conv2d",
    "deformable_psroi_pooling",
@@ -166,6 +168,66 @@ def conv2d(
    return output


+def conv3d(
+    inp: Tensor,
+    weight: Tensor,
+    bias: Optional[Tensor] = None,
+    stride: Union[int, Tuple[int, int, int]] = 1,
+    padding: Union[int, Tuple[int, int, int]] = 0,
+    dilation: Union[int, Tuple[int, int, int]] = 1,
+    groups: int = 1,
+    conv_mode: str = "CROSS_CORRELATION",
+) -> Tensor:
+    """
+    3D convolution operation.
+
+    Refer to :class:`~.Conv3d` for more information.
+
+    :param inp: feature map of the convolution operation.
+    :param weight: convolution kernel.
+    :param bias: bias added to the result of convolution (if given).
+    :param stride: stride of the 3D convolution operation. Default: 1
+    :param padding: size of the paddings added to the input on both sides of its
+        spatial dimensions. Only zero-padding is supported. Default: 0
+    :param dilation: dilation of the 3D convolution operation. Default: 1
+    :param groups: number of groups into which the input and output channels are divided, so as to perform a ``grouped convolution``. When ``groups`` is not 1,
+        ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
+        and the shape of weight should be `(groups, out_channel // groups,
+        in_channels // groups, t, height, width)`.
+    :param conv_mode: supports "CROSS_CORRELATION". Default:
+        "CROSS_CORRELATION"
+    :return: output tensor.
+    """
+    assert conv_mode == "CROSS_CORRELATION"
+
+    D, H, W = 0, 1, 2
+
+    pad = _triple(padding)
+    stride = _triple_nonzero(stride)
+    dilate = _triple_nonzero(dilation)
+
+    sparse_type = "DENSE" if groups == 1 else "GROUP"
+    op = builtin.Convolution3D(
+        pad_d=pad[D],
+        pad_h=pad[H],
+        pad_w=pad[W],
+        stride_d=stride[D],
+        stride_h=stride[H],
+        stride_w=stride[W],
+        dilate_d=dilate[D],
+        dilate_h=dilate[H],
+        dilate_w=dilate[W],
+        strategy=get_execution_strategy(),
+        mode=conv_mode,
+        sparse=sparse_type,
+    )
+    inp, weight = utils.convert_inputs(inp, weight)
+    (output,) = apply(op, inp, weight)
+    if bias is not None:
+        output += bias
+    return output
+
+
 def conv_transpose2d(
    inp: Tensor,
    weight: Tensor,
@@ -1094,7 +1156,7 @@ def matmul(
            transposeB=transpose_b,
            compute_mode=compute_mode,
            format=format,
-            strategy=get_conv_execution_strategy(),
+            strategy=get_execution_strategy(),
        )
    else:
        op = builtin.MatrixMul(
@@ -1102,7 +1164,7 @@ def matmul(
            transposeB=transpose_b,
            compute_mode=compute_mode,
            format=format,
-            strategy=get_conv_execution_strategy(),
+            strategy=get_execution_strategy(),
        )

    (result,) = apply(op, inp1, inp2)

--- a/imperative/python/megengine/module/__init__.py
+++ b/imperative/python/megengine/module/__init__.py
@@ -15,6 +15,7 @@ from .concat import Concat
 from .conv import (
    Conv1d,
    Conv2d,
+    Conv3d,
    ConvRelu2d,
    ConvTranspose2d,
    DeformableConv2d,

--- a/imperative/python/megengine/module/conv.py
+++ b/imperative/python/megengine/module/conv.py
@@ -13,13 +13,14 @@ import numpy as np
 from ..functional import (
    conv1d,
    conv2d,
+    conv3d,
    conv_transpose2d,
    deformable_conv2d,
    local_conv2d,
    relu,
 )
 from ..tensor import Parameter
-from ..utils.tuple_function import _pair, _pair_nonzero
+from ..utils.tuple_function import _pair, _pair_nonzero, _triple, _triple_nonzero
 from . import init
 from .module import Module

@@ -400,6 +401,142 @@ class Conv2d(_ConvNd):
        return self.calc_conv(inp, self.weight, self.bias)


+class Conv3d(_ConvNd):
+
+    r"""
+    Applies a 3D convolution over an input tensor.
+
+    For instance, given an input of the size :math:`(N, C_{\text{in}}, T, H, W)`,
+    this layer generates an output of the size
+    :math:`(N, C_{\text{out}}, T_{\text{out}}}, H_{\text{out}}}, W_{\text{out}}})` through the
+    process described as below:
+
+    .. math::
+        \text{out}(N_i, C_{\text{out}_j}) = \text{bias}(C_{\text{out}_j}) +
+        \sum_{k = 0}^{C_{\text{in}} - 1} \text{weight}(C_{\text{out}_j}, k) \star \text{input}(N_i, k)
+
+    where :math:`\star` is the valid 3D cross-correlation operator,
+    :math:`N` is batch size, :math:`C` denotes number of channels
+
+
+    When `groups == in_channels` and `out_channels == K * in_channels`,
+    where K is a positive integer, this operation is also known as depthwise
+    convolution.
+
+    In other words, for an input of size :math:`(N, C_{in}, T_{int}, H_{in}, W_{in})`,
+    a depthwise convolution with a depthwise multiplier `K`, can be constructed
+    by arguments :math:`(in\_channels=C_{in}, out\_channels=C_{in} \times K, ..., groups=C_{in})`.
+
+    :param in_channels: number of input channels.
+    :param out_channels: number of output channels.
+    :param kernel_size: size of weight on spatial dimensions. If kernel_size is
+        an :class:`int`, the actual kernel size would be
+        `(kernel_size, kernel_size, kernel_size)`. Default: 1
+    :param stride: stride of the 3D convolution operation. Default: 1
+    :param padding: size of the paddings added to the input on both sides of its
+        spatial dimensions. Only zero-padding is supported. Default: 0
+    :param dilation: dilation of the 3D convolution operation. Default: 1
+    :param groups: number of groups into which the input and output channels are divided, so as to perform a "grouped convolution". When ``groups`` is not 1,
+        ``in_channels`` and ``out_channels`` must be divisible by ``groups``,
+        and there would be an extra dimension at the beginning of the weight's
+        shape. Specifically, the shape of weight would be `(groups,
+        out_channel // groups, in_channels // groups, *kernel_size)`.
+    :param bias: whether to add a bias onto the result of convolution. Default:
+        True
+    :param conv_mode: Supports `CROSS_CORRELATION`. Default:
+        `CROSS_CORRELATION`
+
+    Examples:
+
+    .. testcode::
+
+        import numpy as np
+        import megengine as mge
+        import megengine.module as M
+
+        m = M.Conv3d(in_channels=3, out_channels=1, kernel_size=3)
+        inp = mge.tensor(np.arange(0, 384).astype("float32").reshape(2, 3, 4, 4, 4))
+        oup = m(inp)
+        print(oup.numpy().shape)
+
+    Outputs:
+
+    .. testoutput::
+
+        (2, 1, 2, 2, 2)
+
+    """
+
+    def __init__(
+        self,
+        in_channels: int,
+        out_channels: int,
+        kernel_size: Union[int, Tuple[int, int, int]],
+        stride: Union[int, Tuple[int, int, int]] = 1,
+        padding: Union[int, Tuple[int, int, int]] = 0,
+        dilation: Union[int, Tuple[int, int, int]] = 1,
+        groups: int = 1,
+        bias: bool = True,
+        conv_mode: str = "CROSS_CORRELATION",
+    ):
+        kernel_size = _triple_nonzero(kernel_size)
+        stride = _triple_nonzero(stride)
+        padding = _triple(padding)
+        dilation = _triple_nonzero(dilation)
+        self.conv_mode = conv_mode
+        super().__init__(
+            in_channels,
+            out_channels,
+            kernel_size,
+            stride,
+            padding,
+            dilation,
+            groups,
+            bias,
+        )
+
+    def _get_fanin(self):
+        kt, kh, kw = self.kernel_size
+        ic = self.in_channels
+        return kt * kh * kw * ic
+
+    def _infer_weight_shape(self):
+        group = self.groups
+        ichl = self.in_channels
+        ochl = self.out_channels
+        kt, kh, kw = self.kernel_size
+        if group == 1:
+            # Assume format is NCTHW
+            return (ochl, ichl, kt, kh, kw)
+
+        assert (
+            ichl % group == 0 and ochl % group == 0
+        ), "invalid config: input_channels={} output_channels={} group={}".format(
+            ichl, ochl, group
+        )
+        # Assume format is NCTHW
+        return (group, ochl // group, ichl // group, kt, kh, kw)
+
+    def _infer_bias_shape(self):
+        # Assume format is NCTHW
+        return (1, self.out_channels, 1, 1, 1)
+
+    def calc_conv(self, inp, weight, bias):
+        return conv3d(
+            inp,
+            weight,
+            bias,
+            self.stride,
+            self.padding,
+            self.dilation,
+            self.groups,
+            self.conv_mode,
+        )
+
+    def forward(self, inp):
+        return self.calc_conv(inp, self.weight, self.bias)
+
+
 class ConvTranspose2d(_ConvNd):
    r"""
    Applies a 2D transposed convolution over an input tensor.

--- a/imperative/python/megengine/utils/tuple_function.py
+++ b/imperative/python/megengine/utils/tuple_function.py
@@ -35,4 +35,5 @@ _single = functools.partial(get_ndtuple, n=1, allow_zero=True)
 _pair = functools.partial(get_ndtuple, n=2, allow_zero=True)
 _pair_nonzero = functools.partial(get_ndtuple, n=2, allow_zero=False)
 _triple = functools.partial(get_ndtuple, n=3, allow_zero=True)
+_triple_nonzero = functools.partial(get_ndtuple, n=3, allow_zero=False)
 _quadruple = functools.partial(get_ndtuple, n=4, allow_zero=True)
--- a/imperative/python/test/unit/functional/test_functional.py
+++ b/imperative/python/test/unit/functional/test_functional.py
@@ -637,7 +637,7 @@ def test_batch_conv_bias():
    run(1, 4, 4, 5, 5, 3, 3, 0, 0, 1, 1, True)


-def test_zero_stride_numpy_array():
+def test_conv2d_zero_stride_numpy_array():
    inp = np.random.randn(3, 224, 224).astype(np.float32)
    inp = inp[np.newaxis, :]

@@ -646,6 +646,16 @@ def test_zero_stride_numpy_array():
    out = F.conv2d(inp, weight, None, (2, 2), (3, 3), (1, 1), 1)


+def test_conv3d_zero_stride_numpy_array():
+    inp = np.random.randn(3, 224, 224, 224).astype(np.float32)
+    inp = inp[np.newaxis, :]
+
+    inp = tensor(inp, dtype=np.float32)
+    weight = tensor(np.random.randn(16, 3, 3, 3, 3), dtype=np.float32)
+    out = F.conv3d(inp, weight, None, (2, 2, 2), (3, 3, 3), (1, 1, 1), 1)
+    out.numpy()
+
+
 def test_conv1d():
    inp = tensor(np.ones((16,), dtype=np.float32).reshape(2, 2, 4))
    weight = tensor(np.ones((12,), dtype=np.float32).reshape(3, 2, 2))
@@ -658,6 +668,16 @@ def test_conv1d():
    )


+def test_conv3d():
+    inp = tensor(np.ones((256,), dtype=np.float32).reshape(2, 2, 4, 4, 4))
+    weight = tensor(np.ones((48,), dtype=np.float32).reshape(3, 2, 2, 2, 2))
+    out = F.conv3d(inp, weight, None, 2, 0, 1, 1)
+    print(out.numpy().shape)
+    np.testing.assert_equal(
+        out.numpy(), np.ones((2, 3, 2, 2, 2), dtype=np.float32) * 16
+    )
+
+
 def test_condtake():
    x = np.array([[1, 2, 3], [4, 5, 6]])
    y = np.array([[True, False, True], [False, True, True]])

--- a/imperative/src/impl/ops/convolution.cpp
+++ b/imperative/src/impl/ops/convolution.cpp
+/**
+ * \file imperative/src/impl/ops/dnn/convolution.cpp
+ * MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+ *
+ * Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ */
+
+#include "megbrain/imperative/ops/autogen.h"
+#include "megbrain/opr/dnn/convolution.h"
+
+#include "../op_trait.h"
+
+namespace mgb {
+namespace imperative {
+
+namespace { namespace convolution {
+std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
+    auto* node = &node_->cast_final_safe<opr::Convolution>();
+    return Convolution::make(node->param(), node->execution_policy());
+}
+
+auto apply_on_var_node(
+        const OpDef& def,
+        const VarNodeArray& inputs) {
+    auto&& conv = static_cast<const Convolution&>(def);
+    OperatorNodeConfig config{conv.make_name()};
+    return opr::Convolution::make(inputs[0], inputs[1], conv.param(), conv.policy(), config);
+}
+
+OP_TRAIT_REG(Convolution, Convolution, opr::Convolution)
+    .make_from_op_node(make_from_op_node)
+    .apply_on_var_node(apply_on_var_node)
+    .fallback();
+}} // convolution
+
+namespace { namespace convolution_backward_data {
+auto apply_on_var_node(
+        const OpDef& def,
+        const VarNodeArray& inputs) {
+    auto&& conv = static_cast<const ConvolutionBackwardData&>(def);
+    OperatorNodeConfig config{conv.make_name()};
+    if (inputs.size() == 2) {
+        return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config);
+    } else {
+        mgb_assert(inputs.size() == 3);
+        return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], inputs[2], conv.param(), conv.policy(), config);
+    }
+}
+
+OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData)
+    .apply_on_var_node(apply_on_var_node)
+    .fallback();
+}} // convolution_backward_data
+
+namespace { namespace convolution3d {
+std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
+    auto* node = &node_->cast_final_safe<opr::Convolution3D>();
+    return Convolution3D::make(node->param(), node->execution_policy());
+}
+
+auto apply_on_var_node(
+        const OpDef& def,
+        const VarNodeArray& inputs) {
+    auto&& conv = static_cast<const Convolution3D&>(def);
+    return opr::Convolution3D::make(inputs[0], inputs[1], conv.param(), conv.policy());
+}
+
+OP_TRAIT_REG(Convolution3D, Convolution3D, opr::Convolution3D)
+    .make_from_op_node(make_from_op_node)
+    .apply_on_var_node(apply_on_var_node)
+    .fallback();
+}} // convolution3d
+
+}
+}
--- a/imperative/src/impl/ops/specializations.cpp
+++ b/imperative/src/impl/ops/specializations.cpp
@@ -36,45 +36,6 @@

 namespace mgb::imperative {

-namespace { namespace convolution {
-std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
-    auto* node = &node_->cast_final_safe<opr::Convolution>();
-    return Convolution::make(node->param(), node->execution_policy());
-}
-
-auto apply_on_var_node(
-        const OpDef& def,
-        const VarNodeArray& inputs) {
-    auto&& conv = static_cast<const Convolution&>(def);
-    OperatorNodeConfig config{conv.make_name()};
-    return opr::Convolution::make(inputs[0], inputs[1], conv.param(), conv.policy(), config);
-}
-
-OP_TRAIT_REG(Convolution, Convolution, opr::Convolution)
-    .make_from_op_node(make_from_op_node)
-    .apply_on_var_node(apply_on_var_node)
-    .fallback();
-}} // convolution
-
-namespace { namespace convolution_backward_data {
-auto apply_on_var_node(
-        const OpDef& def,
-        const VarNodeArray& inputs) {
-    auto&& conv = static_cast<const ConvolutionBackwardData&>(def);
-    OperatorNodeConfig config{conv.make_name()};
-    if (inputs.size() == 2) {
-        return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], conv.param(), conv.policy(), config);
-    } else {
-        mgb_assert(inputs.size() == 3);
-        return opr::ConvolutionBackwardData::make(inputs[0], inputs[1], inputs[2], conv.param(), conv.policy(), config);
-    }
-}
-
-OP_TRAIT_REG(ConvolutionBackwardData, ConvolutionBackwardData)
-    .apply_on_var_node(apply_on_var_node)
-    .fallback();
-}} // convolution_backward_data
-
 namespace { namespace dimshuffle {
 std::shared_ptr<OpDef> make_from_op_node(cg::OperatorNodeBase* node_) {
    auto* node = &node_->cast_final_safe<opr::Dimshuffle>();

--- a/src/core/include/megbrain/ir/ops.td
+++ b/src/core/include/megbrain/ir/ops.td
@@ -51,6 +51,8 @@ def Convolution : MgbHashableOp<"Convolution", [ConvolutionParam, ExecutionPolic

 def ConvolutionBackwardData: MgbHashableOp<"ConvolutionBackwardData", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>;

+def Convolution3D: MgbHashableOp<"Convolution3D", [Convolution3DParam, ExecutionPolicyParamBase<"policy">]>;
+
 def DeformableConv : MgbHashableOp<"DeformableConv", [ConvolutionParam, ExecutionPolicyParamBase<"policy">]>;

 def GroupLocal: MgbHashableOp<"GroupLocal", [ConvolutionParam]>;