From a25570d624ad27858edb77f3e6036497e5fcc92c Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Mon, 10 Aug 2020 17:29:59 +0800 Subject: [PATCH] fix(detection): optimize dtype (#55) --- .../detection/layers/basic/functional.py | 35 ------------------- .../vision/detection/layers/det/anchor.py | 4 +-- .../vision/detection/layers/det/box_utils.py | 4 +-- .../vision/detection/models/faster_rcnn.py | 4 +-- official/vision/detection/models/retinanet.py | 6 ++-- official/vision/detection/tools/test.py | 4 +-- 6 files changed, 11 insertions(+), 46 deletions(-) diff --git a/official/vision/detection/layers/basic/functional.py b/official/vision/detection/layers/basic/functional.py index f44e744..ad8ad61 100644 --- a/official/vision/detection/layers/basic/functional.py +++ b/official/vision/detection/layers/basic/functional.py @@ -13,41 +13,6 @@ import megengine.functional as F from megengine.core import Tensor -def get_padded_array_np( - array: np.ndarray, multiple_number: int = 32, pad_value: float = 0 -) -> np.ndarray: - """ pad the nd-array to multiple stride of th e - - Args: - array (np.ndarray): - the array with the shape of [batch, channel, height, width] - multiple_number (int): - make the height and width can be divided by multiple_number - pad_value (int): the value to be padded - - Returns: - padded_array (np.ndarray) - """ - batch, chl, t_height, t_width = array.shape - padded_height = ( - (t_height + multiple_number - 1) // multiple_number * multiple_number - ) - padded_width = (t_width + multiple_number - 1) // multiple_number * multiple_number - - padded_array = ( - np.ones([batch, chl, padded_height, padded_width], dtype=np.float32) * pad_value - ) - - ndim = array.ndim - if ndim == 4: - padded_array[:, :, :t_height, :t_width] = array - elif ndim == 3: - padded_array[:, :t_height, :t_width] = array - else: - raise Exception("Not supported tensor dim: %d" % ndim) - return padded_array - - def get_padded_tensor( array: Tensor, multiple_number: int = 32, pad_value: float = 0 ) -> Tensor: diff --git a/official/vision/detection/layers/det/anchor.py b/official/vision/detection/layers/det/anchor.py index 83a5b9f..30cfd45 100644 --- a/official/vision/detection/layers/det/anchor.py +++ b/official/vision/detection/layers/det/anchor.py @@ -48,8 +48,8 @@ class DefaultAnchorGenerator(BaseAnchorGenerator): ): super().__init__() self.base_size = base_size - self.anchor_scales = np.array(anchor_scales) - self.anchor_ratios = np.array(anchor_ratios) + self.anchor_scales = np.array(anchor_scales, dtype=np.float32) + self.anchor_ratios = np.array(anchor_ratios, dtype=np.float32) self.offset = offset def _whctrs(self, anchor): diff --git a/official/vision/detection/layers/det/box_utils.py b/official/vision/detection/layers/det/box_utils.py index 4d62590..cc9ad47 100644 --- a/official/vision/detection/layers/det/box_utils.py +++ b/official/vision/detection/layers/det/box_utils.py @@ -42,8 +42,8 @@ class BoxCoder(BoxCoderBase, metaclass=ABCMeta): reg_std(np.ndarray): [x0_std, x1_std, y0_std, y1_std] or None """ - self.reg_mean = np.array(reg_mean)[None, :] - self.reg_std = np.array(reg_std)[None, :] + self.reg_mean = np.array(reg_mean, dtype=np.float32)[None, :] + self.reg_std = np.array(reg_std, dtype=np.float32)[None, :] super().__init__() @staticmethod diff --git a/official/vision/detection/models/faster_rcnn.py b/official/vision/detection/models/faster_rcnn.py index 60ce5ca..bfade46 100644 --- a/official/vision/detection/models/faster_rcnn.py +++ b/official/vision/detection/models/faster_rcnn.py @@ -68,8 +68,8 @@ class FasterRCNN(M.Module): def preprocess_image(self, image): normed_image = ( - image - np.array(self.cfg.img_mean)[None, :, None, None] - ) / np.array(self.cfg.img_std)[None, :, None, None] + image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] return layers.get_padded_tensor(normed_image, 32, 0.0) def forward(self, inputs): diff --git a/official/vision/detection/models/retinanet.py b/official/vision/detection/models/retinanet.py index b515d1e..696238e 100644 --- a/official/vision/detection/models/retinanet.py +++ b/official/vision/detection/models/retinanet.py @@ -33,7 +33,7 @@ class RetinaNet(M.Module): ) self.box_coder = layers.BoxCoder(cfg.reg_mean, cfg.reg_std) - self.stride_list = np.array(cfg.stride).astype(np.float32) + self.stride_list = np.array(cfg.stride, dtype=np.float32) self.in_features = ["p3", "p4", "p5", "p6", "p7"] # ----------------------- build the backbone ------------------------ # @@ -82,8 +82,8 @@ class RetinaNet(M.Module): def preprocess_image(self, image): normed_image = ( - image - np.array(self.cfg.img_mean)[None, :, None, None] - ) / np.array(self.cfg.img_std)[None, :, None, None] + image - np.array(self.cfg.img_mean, dtype=np.float32)[None, :, None, None] + ) / np.array(self.cfg.img_std, dtype=np.float32)[None, :, None, None] return layers.get_padded_tensor(normed_image, 32, 0.0) def forward(self, inputs): diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py index 47a458e..1ef1bd4 100644 --- a/official/vision/detection/tools/test.py +++ b/official/vision/detection/tools/test.py @@ -64,7 +64,7 @@ def main(): args.start_epoch = cfg.max_epoch - 1 if args.end_epoch == -1: args.end_epoch = args.start_epoch - assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch + assert 0 <= args.start_epoch <= args.end_epoch < cfg.max_epoch for epoch_num in range(args.start_epoch, args.end_epoch + 1): if args.weight_file: @@ -182,7 +182,7 @@ def worker( result_queue.put_nowait( { "det_res": pred_res, - "image_id": int(data_dict[1][2][0].split(".")[0].split("_")[-1]), + "image_id": int(data_dict[1][2][0].split(".")[0]), } ) -- GitLab