box_utils.py 5.9 KB
Newer Older
M
MegEngine Team 已提交
1 2 3 4 5 6 7 8 9 10
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
from abc import ABCMeta, abstractmethod

11 12
import numpy as np

M
MegEngine Team 已提交
13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33
import megengine.functional as F
from megengine.core import Tensor


class BoxCoderBase(metaclass=ABCMeta):
    """Boxcoder class.
    """

    def __init__(self):
        pass

    @abstractmethod
    def encode(self) -> Tensor:
        pass

    @abstractmethod
    def decode(self) -> Tensor:
        pass


class BoxCoder(BoxCoderBase, metaclass=ABCMeta):
34 35 36 37 38
    def __init__(
        self,
        reg_mean=[0.0, 0.0, 0.0, 0.0],
        reg_std=[1.0, 1.0, 1.0, 1.0],
    ):
M
MegEngine Team 已提交
39 40 41 42 43 44
        """
        Args:
            reg_mean(np.ndarray): [x0_mean, x1_mean, y0_mean, y1_mean] or None
            reg_std(np.ndarray):  [x0_std, x1_std, y0_std, y1_std] or None

        """
45 46
        self.reg_mean = np.array(reg_mean)[None, :]
        self.reg_std = np.array(reg_std)[None, :]
M
MegEngine Team 已提交
47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90
        super().__init__()

    @staticmethod
    def _concat_new_axis(t1, t2, t3, t4, axis=1):
        return F.concat(
            [
                F.add_axis(t1, -1),
                F.add_axis(t2, -1),
                F.add_axis(t3, -1),
                F.add_axis(t4, -1),
            ],
            axis=axis,
        )

    @staticmethod
    def _box_ltrb_to_cs_opr(bbox, addaxis=None):
        """ transform the left-top right-bottom encoding bounding boxes
        to center and size encodings"""
        bbox_width = bbox[:, 2] - bbox[:, 0]
        bbox_height = bbox[:, 3] - bbox[:, 1]
        bbox_ctr_x = bbox[:, 0] + 0.5 * bbox_width
        bbox_ctr_y = bbox[:, 1] + 0.5 * bbox_height
        if addaxis is None:
            return bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y
        else:
            return (
                F.add_axis(bbox_width, addaxis),
                F.add_axis(bbox_height, addaxis),
                F.add_axis(bbox_ctr_x, addaxis),
                F.add_axis(bbox_ctr_y, addaxis),
            )

    def encode(self, bbox: Tensor, gt: Tensor) -> Tensor:
        (bbox_width, bbox_height, bbox_ctr_x, bbox_ctr_y,) = self._box_ltrb_to_cs_opr(
            bbox
        )
        gt_width, gt_height, gt_ctr_x, gt_ctr_y = self._box_ltrb_to_cs_opr(gt)

        target_dx = (gt_ctr_x - bbox_ctr_x) / bbox_width
        target_dy = (gt_ctr_y - bbox_ctr_y) / bbox_height
        target_dw = F.log(gt_width / bbox_width)
        target_dh = F.log(gt_height / bbox_height)
        target = self._concat_new_axis(target_dx, target_dy, target_dw, target_dh)

91 92
        target -= self.reg_mean
        target /= self.reg_std
M
MegEngine Team 已提交
93 94 95
        return target

    def decode(self, anchors: Tensor, deltas: Tensor) -> Tensor:
96 97
        deltas *= self.reg_std
        deltas += self.reg_mean
M
MegEngine Team 已提交
98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115

        (
            anchor_width,
            anchor_height,
            anchor_ctr_x,
            anchor_ctr_y,
        ) = self._box_ltrb_to_cs_opr(anchors, 1)
        pred_ctr_x = anchor_ctr_x + deltas[:, 0::4] * anchor_width
        pred_ctr_y = anchor_ctr_y + deltas[:, 1::4] * anchor_height
        pred_width = anchor_width * F.exp(deltas[:, 2::4])
        pred_height = anchor_height * F.exp(deltas[:, 3::4])

        pred_x1 = pred_ctr_x - 0.5 * pred_width
        pred_y1 = pred_ctr_y - 0.5 * pred_height
        pred_x2 = pred_ctr_x + 0.5 * pred_width
        pred_y2 = pred_ctr_y + 0.5 * pred_height

        pred_box = self._concat_new_axis(pred_x1, pred_y1, pred_x2, pred_y2, 2)
116
        pred_box = pred_box.reshape(pred_box.shapeof(0), -1)
M
MegEngine Team 已提交
117 118 119 120

        return pred_box


121
def get_iou(boxes1: Tensor, boxes2: Tensor, return_ignore=False) -> Tensor:
M
MegEngine Team 已提交
122 123 124 125 126 127 128 129 130 131 132 133 134 135
    """
    Given two lists of boxes of size N and M,
    compute the IoU (intersection over union)
    between __all__ N x M pairs of boxes.
    The box order must be (xmin, ymin, xmax, ymax).

    Args:
        boxes1,boxes2 (Boxes): two `Boxes`. Contains N & M boxes, respectively.

    Returns:
        Tensor: IoU, sized [N,M].
    """
    box = boxes1
    gt = boxes2
136
    target_shape = (boxes1.shapeof(0), boxes2.shapeof(0), 4)
M
MegEngine Team 已提交
137 138

    b_box = F.add_axis(boxes1, 1).broadcast(*target_shape)
139
    b_gt = F.add_axis(boxes2[:, :4], 0).broadcast(*target_shape)
M
MegEngine Team 已提交
140 141 142 143 144 145 146 147 148 149 150 151

    iw = F.minimum(b_box[:, :, 2], b_gt[:, :, 2]) - F.maximum(
        b_box[:, :, 0], b_gt[:, :, 0]
    )
    ih = F.minimum(b_box[:, :, 3], b_gt[:, :, 3]) - F.maximum(
        b_box[:, :, 1], b_gt[:, :, 1]
    )
    inter = F.maximum(iw, 0) * F.maximum(ih, 0)

    area_box = (box[:, 2] - box[:, 0]) * (box[:, 3] - box[:, 1])
    area_gt = (gt[:, 2] - gt[:, 0]) * (gt[:, 3] - gt[:, 1])

152
    area_target_shape = (box.shapeof(0), gt.shapeof(0))
M
MegEngine Team 已提交
153 154 155 156 157 158 159

    b_area_box = F.add_axis(area_box, 1).broadcast(*area_target_shape)
    b_area_gt = F.add_axis(area_gt, 0).broadcast(*area_target_shape)

    union = b_area_box + b_area_gt - inter
    overlaps = F.maximum(inter / union, 0)

160 161 162
    if return_ignore:
        overlaps_ignore = F.maximum(inter / b_area_box, 0)
        gt_ignore_mask = F.add_axis((gt[:, 4] == -1), 0).broadcast(*area_target_shape)
163
        overlaps *= 1 - gt_ignore_mask
164 165 166
        overlaps_ignore *= gt_ignore_mask
        return overlaps, overlaps_ignore

M
MegEngine Team 已提交
167 168 169 170 171 172
    return overlaps


def get_clipped_box(boxes, hw):
    """ Clip the boxes into the image region."""
    # x1 >=0
173
    box_x1 = F.clamp(boxes[:, 0::4], lower=0, upper=hw[1])
M
MegEngine Team 已提交
174
    # y1 >=0
175
    box_y1 = F.clamp(boxes[:, 1::4], lower=0, upper=hw[0])
M
MegEngine Team 已提交
176
    # x2 < im_info[1]
177
    box_x2 = F.clamp(boxes[:, 2::4], lower=0, upper=hw[1])
M
MegEngine Team 已提交
178
    # y2 < im_info[0]
179
    box_y2 = F.clamp(boxes[:, 3::4], lower=0, upper=hw[0])
M
MegEngine Team 已提交
180 181 182 183

    clip_box = F.concat([box_x1, box_y1, box_x2, box_y2], axis=1)

    return clip_box
184 185 186 187 188 189 190


def filter_boxes(boxes, size=0):
    width = boxes[:, 2] - boxes[:, 0]
    height = boxes[:, 3] - boxes[:, 1]
    keep = (width > size) * (height > size)
    return keep