未验证 提交 e61ab3d8 编写于 作者: G Guanghua Yu 提交者: GitHub

add Mask R-CNN model (#1787)

* add Mask R-CNN model

* fix mask rcnn eval

* fix mask rcnn infer

* fix mask rcnn config

* fix infer config
上级 7e044196
architecture: MaskRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/mask_rcnn_r50_fpn_1x/model_final
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 1024
feat_out: 1024
anchor_per_position: 15
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 12000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 6000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor: RoIAlign
head_feat:
name: Res5Head
feat_in: 1024
feat_out: 512
with_pool: true
in_feat: 2048
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 14
RoIAlign:
resolution: 14
sampling_ratio: 0
start_level: 0
end_level: 0
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 0
feat_in: 2048
feat_out: 256
mask_roi_extractor: RoIAlign
share_bbox_feat: true
feat_in: 256
MaskPostProcess:
mask_resolution: 14
worker_num: 2
TrainReader:
inputs_def:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
sample_transforms:
- DecodeImage: {to_rgb: true}
- RandomFlipImage: {prob: 0.5, is_mask_flip: true}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
inputs_def:
fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
inputs_def:
fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -9,7 +9,7 @@ TrainReader:
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true}
- PadBatch: {pad_to_stride: -1., use_padded_im_info: false, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
......@@ -24,7 +24,7 @@ EvalReader:
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
......@@ -33,14 +33,15 @@ EvalReader:
TestReader:
inputs_def:
fields: ['image', 'im_info', 'im_id']
fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms:
- DecodeImage: {to_rgb: true, with_mixup: false}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true}
- Permute: {channel_first: true, to_bgr: false}
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: false}
- PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
_BASE_: [
'./_base_/models/mask_rcnn_r50.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/mask_reader.yml',
'./_base_/runtime.yml',
]
......@@ -2,6 +2,6 @@ _BASE_: [
'./_base_/models/mask_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/mask_reader.yml',
'./_base_/readers/mask_fpn_reader.yml',
'./_base_/runtime.yml',
]
......@@ -65,7 +65,7 @@ class MaskRCNN(BaseArch):
def model_arch(self):
# Backbone
body_feats = self.backbone(self.inputs)
spatial_scale = None
spatial_scale = 1. / 16
# Neck
if self.neck is not None:
......@@ -87,8 +87,8 @@ class MaskRCNN(BaseArch):
# compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out)
# BBox Head
bbox_feat, self.bbox_head_out = self.bbox_head(body_feats, rois,
spatial_scale)
bbox_feat, self.bbox_head_out, self.bbox_head_feat_func = self.bbox_head(
body_feats, rois, spatial_scale)
rois_has_mask_int32 = None
if self.inputs['mode'] == 'infer':
......@@ -106,9 +106,9 @@ class MaskRCNN(BaseArch):
bbox_targets)
# Mask Head
self.mask_head_out = self.mask_head(self.inputs, body_feats,
self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale)
self.mask_head_out = self.mask_head(
self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale, self.bbox_head_feat_func)
def get_loss(self, ):
loss = {}
......
......@@ -22,6 +22,9 @@ from paddle.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling import ops
from ..backbone.name_adapter import NameAdapter
from ..backbone.resnet import Blocks
@register
class TwoFCHead(nn.Layer):
......@@ -74,6 +77,23 @@ class TwoFCHead(nn.Layer):
return fc7_relu
@register
class Res5Head(nn.Layer):
def __init__(self, feat_in=1024, feat_out=512):
super(Res5Head, self).__init__()
na = NameAdapter(self)
self.res5_conv = []
self.res5 = self.add_sublayer(
'res5_roi_feat',
Blocks(
feat_in, feat_out, count=3, name_adapter=na, stage_num=5))
self.feat_out = feat_out * 4
def forward(self, roi_feat, stage=0):
y = self.res5(roi_feat)
return y
@register
class BBoxFeat(nn.Layer):
__inject__ = ['roi_extractor', 'head_feat']
......@@ -86,7 +106,7 @@ class BBoxFeat(nn.Layer):
def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage)
return bbox_feat
return bbox_feat, self.head_feat
@register
......@@ -139,15 +159,19 @@ class BBoxHead(nn.Layer):
self.bbox_delta_list.append(bbox_delta)
def forward(self, body_feats, rois, spatial_scale, stage=0):
bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage)
if self.with_pool:
bbox_feat = F.pool2d(
bbox_feat, pool_type='avg', global_pooling=True)
bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois,
spatial_scale, stage)
bbox_head_out = []
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
if self.with_pool:
bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
scores = self.bbox_score_list[stage](bbox_feat_)
deltas = self.bbox_delta_list[stage](bbox_feat_)
else:
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out.append((scores, deltas))
return bbox_feat, bbox_head_out
return bbox_feat, bbox_head_out, head_feat_func
def _get_head_loss(self, score, delta, target):
# bbox cls
......
......@@ -28,8 +28,8 @@ class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor']
def __init__(self,
mask_roi_extractor,
num_convs=1,
mask_roi_extractor=None,
num_convs=0,
feat_in=2048,
feat_out=256,
mask_num_stages=1,
......@@ -82,12 +82,16 @@ class MaskFeat(Layer):
bbox_feat,
mask_index,
spatial_scale,
stage=0):
if self.share_bbox_feat:
stage=0,
bbox_head_feat_func=None,
mode='train'):
if self.share_bbox_feat and mask_index:
rois_feat = paddle.gather(bbox_feat, mask_index)
else:
rois_feat = self.mask_roi_extractor(body_feats, bboxes,
spatial_scale)
if bbox_head_feat_func is not None and mode == 'infer':
rois_feat = bbox_head_feat_func(rois_feat)
# upsample
mask_feat = self.upsample_module[stage](rois_feat)
return mask_feat
......@@ -131,8 +135,14 @@ class MaskHead(Layer):
spatial_scale,
stage=0):
# feat
mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage)
mask_feat = self.mask_feat(
body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage,
mode='train')
# logits
mask_head_out = self.mask_fcn_logits[stage](mask_feat)
return mask_head_out
......@@ -144,7 +154,8 @@ class MaskHead(Layer):
bbox_feat,
mask_index,
spatial_scale,
stage=0):
stage=0,
bbox_head_feat_func=None):
bbox, bbox_num = bboxes
if bbox.shape[0] == 0:
mask_head_out = bbox
......@@ -155,11 +166,18 @@ class MaskHead(Layer):
scale_factor_list.append(scale_factor[idx, 0])
scale_factor_list = paddle.cast(
paddle.concat(scale_factor_list), 'float32')
scaled_bbox = paddle.multiply(
bbox[:, 2:], scale_factor_list, axis=0)
scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1])
scaled_bbox = paddle.multiply(bbox[:, 2:], scale_factor_list)
scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat,
mask_index, spatial_scale, stage)
mask_feat = self.mask_feat(
body_feats,
scaled_bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage,
bbox_head_feat_func,
mode='infer')
mask_logit = self.mask_fcn_logits[stage](mask_feat)
mask_head_out = F.sigmoid(mask_logit)
return mask_head_out
......@@ -171,15 +189,16 @@ class MaskHead(Layer):
bbox_feat,
mask_index,
spatial_scale,
bbox_head_feat_func=None,
stage=0):
if inputs['mode'] == 'train':
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage)
else:
scale_factor = inputs['scale_factor']
mask_head_out = self.forward_test(scale_factor, body_feats, bboxes,
bbox_feat, mask_index,
spatial_scale, stage)
mask_head_out = self.forward_test(
scale_factor, body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage, bbox_head_feat_func)
return mask_head_out
def get_loss(self, mask_head_out, mask_target):
......
......@@ -295,21 +295,13 @@ class RCNNBox(object):
box_normalized=self.box_normalized,
axis=self.axis)
# TODO: Updata box_clip
origin_h = origin_shape[:, 0] - 1
origin_w = origin_shape[:, 1] - 1
origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1)
origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1)
zeros = paddle.zeros(origin_h.shape, 'float32')
x1 = paddle.maximum(
paddle.minimum(
bbox[:, :, 0], origin_w, axis=0), zeros, axis=0)
y1 = paddle.maximum(
paddle.minimum(
bbox[:, :, 1], origin_h, axis=0), zeros, axis=0)
x2 = paddle.maximum(
paddle.minimum(
bbox[:, :, 2], origin_w, axis=0), zeros, axis=0)
y2 = paddle.maximum(
paddle.minimum(
bbox[:, :, 3], origin_h, axis=0), zeros, axis=0)
x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros)
y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros)
x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros)
y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros)
bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
bboxes = (bbox, rois_num)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册