feat(functional/nn): support F.warp_perspective with `mat_idx`

GitOrigin-RevId: 66910c8bd8ed96f888f50653e8cfc4326b12cfe8

feat(functional/nn): support F.warp_perspective with `mat_idx`
GitOrigin-RevId: 66910c8bd8ed96f888f50653e8cfc4326b12cfe8
976b351a · Megvii Engine Team · 899d70a8 · 976b351a · 976b351a
2 changed file
--- a/imperative/python/megengine/functional/vision.py
+++ b/imperative/python/megengine/functional/vision.py
@@ -300,18 +300,18 @@ def remap(

 def warp_affine(
    inp: Tensor,
-    weight: Tensor,
-    out_shape,
-    border_mode="replicate",
-    border_val=0,
-    format="NHWC",
-    imode="linear",
-):
+    mat: Tensor,
+    out_shape: Union[Tuple[int, int], int, Tensor],
+    border_mode: str = "replicate",
+    border_val: float = 0.0,
+    format: str = "NHWC",
+    interp_mode: str = "linear",
+) -> Tensor:
    """
    Batched affine transform on 2D images.

    :param inp: input image.
-    :param weight: weight tensor.
+    :param mat: `(batch, 2, 3)` transformation matrix.
    :param out_shape: output tensor shape.
    :param border_mode: pixel extrapolation method.
        Default: "wrap". Currently "constant", "reflect",
@@ -319,7 +319,7 @@ def warp_affine(
    :param border_val: value used in case of a constant border. Default: 0
    :param format: "NHWC" as default based on historical concerns,
        "NCHW" is also supported. Default: "NHWC".
-    :param imode: interpolation methods. Could be "linear", "nearest", "cubic", "area".
+    :param interp_mode: interpolation methods. Could be "linear", "nearest", "cubic", "area".
        Default: "linear".
    :return: output tensor.

@@ -330,19 +330,24 @@ def warp_affine(
       On different platforms, different combinations are supported.
    """
    op = builtin.WarpAffine(
-        border_mode=border_mode, border_val=border_val, format=format, imode=imode
+        border_mode=border_mode,
+        border_val=border_val,
+        format=format,
+        imode=interp_mode,
    )
    out_shape = utils.astensor1d(out_shape, inp, dtype="int32", device=inp.device)
-    (result,) = apply(op, inp, weight, out_shape)
+    (result,) = apply(op, inp, mat, out_shape)
    return result


 def warp_perspective(
    inp: Tensor,
-    M: Tensor,
-    dsize: Union[Tuple[int, int], int, Tensor],
+    mat: Tensor,
+    out_shape: Union[Tuple[int, int], int, Tensor],
+    mat_idx: Optional[Union[Iterable[int], Tensor]] = None,
    border_mode: str = "replicate",
    border_val: float = 0.0,
+    format: str = "NCHW",
    interp_mode: str = "linear",
 ) -> Tensor:
    r"""
@@ -356,18 +361,23 @@ def warp_perspective(
                \frac{M_{10}h + M_{11}w + M_{12}}{M_{20}h + M_{21}w + M_{22}}
                \right)

+    Optionally, we can set `mat_idx` to assign different transformations to the same image,
+    otherwise the input images and transformations should be one-to-one correnspondence.
+
    :param inp: input image.
-    :param M: `(batch, 3, 3)` transformation matrix.
-    :param dsize: `(h, w)` size of the output image.
+    :param mat: `(batch, 3, 3)` transformation matrix.
+    :param out_shape: `(h, w)` size of the output image.
+    :param mat_idx: `(batch, )` image batch idx assigned to each matrix. Default: None
    :param border_mode: pixel extrapolation method.
        Default: "replicate". Currently also support "constant", "reflect",
        "reflect_101", "wrap".
    :param border_val: value used in case of a constant border. Default: 0
+    :param format: "NHWC" is also supported. Default: "NCHW".
    :param interp_mode: interpolation methods.
        Default: "linear". Currently only support "linear" mode.
    :return: output tensor.

-    Note:
+    .. note::

       The transformation matrix is the inverse of that used by `cv2.warpPerspective`.

@@ -398,11 +408,15 @@ def warp_perspective(

    """
    op = builtin.WarpPerspective(
-        imode=interp_mode, bmode=border_mode, format="NCHW", border_val=border_val
+        imode=interp_mode, bmode=border_mode, format=format, border_val=border_val
    )
-    inp, M = utils.convert_inputs(inp, M)
-    dsize = astensor1d(dsize, inp, dtype="int32", device=inp.device)
-    (result,) = apply(op, inp, M, dsize)
+    inp, mat = utils.convert_inputs(inp, mat)
+    out_shape = astensor1d(out_shape, inp, dtype="int32", device=inp.device)
+    if mat_idx is not None:
+        mat_idx = astensor1d(mat_idx, inp, dtype="int32", device=inp.device)
+        (result,) = apply(op, inp, mat, mat_idx, out_shape)
+        return result
+    (result,) = apply(op, inp, mat, out_shape)
    return result



--- a/imperative/python/test/unit/functional/test_functional.py
+++ b/imperative/python/test/unit/functional/test_functional.py
@@ -370,6 +370,32 @@ def test_warp_perspective():
    )


+def test_warp_perspective_mat_idx():
+    inp_shape = (2, 1, 4, 4)
+    x = tensor(np.arange(32, dtype=np.float32).reshape(inp_shape))
+    M_shape = (1, 3, 3)
+    # M defines a translation: dst(1, 1, h, w) = rst(1, 1, h+1, w+1)
+    M = tensor(
+        np.array(
+            [[1.0, 0.0, 1.0], [0.0, 1.0, 1.0], [0.0, 0.0, 1.0]], dtype=np.float32
+        ).reshape(M_shape)
+    )
+    M = F.concat([M,] * 4, 0)
+    outp = F.vision.warp_perspective(x, M, (2, 2), mat_idx=[0, 1, 1, 0])
+    np.testing.assert_equal(
+        outp.numpy(),
+        np.array(
+            [
+                [[[5.0, 6.0], [9.0, 10.0]]],
+                [[[21.0, 22.0], [25.0, 26.0]]],
+                [[[21.0, 22.0], [25.0, 26.0]]],
+                [[[5.0, 6.0], [9.0, 10.0]]],
+            ],
+            dtype=np.float32,
+        ),
+    )
+
+
 def test_warp_affine():
    inp_shape = (1, 3, 3, 3)
    x = tensor(np.arange(27, dtype=np.float32).reshape(inp_shape))