未验证 提交 e61ab3d8 编写于 作者: G Guanghua Yu 提交者: GitHub

add Mask R-CNN model (#1787)

* add Mask R-CNN model

* fix mask rcnn eval

* fix mask rcnn infer

* fix mask rcnn config

* fix infer config
上级 7e044196
architecture: MaskRCNN
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_cos_pretrained.tar
weights: output/mask_rcnn_r50_fpn_1x/model_final
load_static_weights: True
# Model Achitecture
MaskRCNN:
# model anchor info flow
anchor: Anchor
proposal: Proposal
mask: Mask
# model feat info flow
backbone: ResNet
rpn_head: RPNHead
bbox_head: BBoxHead
mask_head: MaskHead
# post process
bbox_post_process: BBoxPostProcess
mask_post_process: MaskPostProcess
ResNet:
# index 0 stands for res2
depth: 50
norm_type: bn
freeze_at: 0
return_idx: [2]
num_stages: 3
RPNHead:
rpn_feat:
name: RPNFeat
feat_in: 1024
feat_out: 1024
anchor_per_position: 15
Anchor:
anchor_generator:
name: AnchorGeneratorRPN
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_target_generator:
name: AnchorTargetGeneratorRPN
batch_size_per_im: 256
fg_fraction: 0.5
negative_overlap: 0.3
positive_overlap: 0.7
straddle_thresh: 0.0
Proposal:
proposal_generator:
name: ProposalGenerator
min_size: 0.0
nms_thresh: 0.7
train_pre_nms_top_n: 12000
train_post_nms_top_n: 2000
infer_pre_nms_top_n: 6000
infer_post_nms_top_n: 1000
proposal_target_generator:
name: ProposalTargetGenerator
batch_size_per_im: 512
bbox_reg_weights: [[0.1, 0.1, 0.2, 0.2],]
bg_thresh_hi: [0.5,]
bg_thresh_lo: [0.0,]
fg_thresh: [0.5,]
fg_fraction: 0.25
BBoxHead:
bbox_feat:
name: BBoxFeat
roi_extractor: RoIAlign
head_feat:
name: Res5Head
feat_in: 1024
feat_out: 512
with_pool: true
in_feat: 2048
BBoxPostProcess:
decode:
name: RCNNBox
num_classes: 81
batch_size: 1
nms:
name: MultiClassNMS
keep_top_k: 100
score_threshold: 0.05
nms_threshold: 0.5
Mask:
mask_target_generator:
name: MaskTargetGenerator
mask_resolution: 14
RoIAlign:
resolution: 14
sampling_ratio: 0
start_level: 0
end_level: 0
MaskHead:
mask_feat:
name: MaskFeat
num_convs: 0
feat_in: 2048
feat_out: 256
mask_roi_extractor: RoIAlign
share_bbox_feat: true
feat_in: 256
MaskPostProcess:
mask_resolution: 14
worker_num: 2
TrainReader:
inputs_def:
fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_poly']
sample_transforms:
- DecodeImage: {to_rgb: true}
- RandomFlipImage: {prob: 0.5, is_mask_flip: true}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true}
batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true}
batch_size: 1
shuffle: true
drop_last: true
EvalReader:
inputs_def:
fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
drop_empty: false
TestReader:
inputs_def:
fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms:
- DecodeOp: {}
- NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {}
batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false}
batch_size: 1
shuffle: false
drop_last: false
...@@ -9,7 +9,7 @@ TrainReader: ...@@ -9,7 +9,7 @@ TrainReader:
- ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true} - ResizeImage: {target_size: 800, max_size: 1333, interp: 1, use_cv2: true}
- Permute: {to_bgr: false, channel_first: true} - Permute: {to_bgr: false, channel_first: true}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: true} - PadBatch: {pad_to_stride: -1., use_padded_im_info: false, pad_gt: true}
batch_size: 1 batch_size: 1
shuffle: true shuffle: true
drop_last: true drop_last: true
...@@ -24,7 +24,7 @@ EvalReader: ...@@ -24,7 +24,7 @@ EvalReader:
- ResizeOp: {interp: 1, target_size: [800, 1333]} - ResizeOp: {interp: 1, target_size: [800, 1333]}
- PermuteOp: {} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatchOp: {pad_to_stride: 32, pad_gt: false} - PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
...@@ -33,14 +33,15 @@ EvalReader: ...@@ -33,14 +33,15 @@ EvalReader:
TestReader: TestReader:
inputs_def: inputs_def:
fields: ['image', 'im_info', 'im_id'] fields: ['image', 'im_shape', 'scale_factor', 'im_id']
sample_transforms: sample_transforms:
- DecodeImage: {to_rgb: true, with_mixup: false} - DecodeOp: {}
- NormalizeImage: {is_channel_first: false, is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]} - NormalizeImageOp: {is_scale: true, mean: [0.485,0.456,0.406], std: [0.229, 0.224,0.225]}
- ResizeImage: {interp: 1, max_size: 1333, target_size: 800, use_cv2: true} - ResizeOp: {interp: 1, target_size: [800, 1333]}
- Permute: {channel_first: true, to_bgr: false} - PermuteOp: {}
batch_transforms: batch_transforms:
- PadBatch: {pad_to_stride: 32, use_padded_im_info: false, pad_gt: false} - PadBatchOp: {pad_to_stride: -1., pad_gt: false}
batch_size: 1 batch_size: 1
shuffle: false shuffle: false
drop_last: false drop_last: false
drop_empty: false
_BASE_: [
'./_base_/models/mask_rcnn_r50.yml',
'./_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml',
'./_base_/readers/mask_reader.yml',
'./_base_/runtime.yml',
]
...@@ -2,6 +2,6 @@ _BASE_: [ ...@@ -2,6 +2,6 @@ _BASE_: [
'./_base_/models/mask_rcnn_r50_fpn.yml', './_base_/models/mask_rcnn_r50_fpn.yml',
'./_base_/optimizers/rcnn_1x.yml', './_base_/optimizers/rcnn_1x.yml',
'./_base_/datasets/coco.yml', './_base_/datasets/coco.yml',
'./_base_/readers/mask_reader.yml', './_base_/readers/mask_fpn_reader.yml',
'./_base_/runtime.yml', './_base_/runtime.yml',
] ]
...@@ -65,7 +65,7 @@ class MaskRCNN(BaseArch): ...@@ -65,7 +65,7 @@ class MaskRCNN(BaseArch):
def model_arch(self): def model_arch(self):
# Backbone # Backbone
body_feats = self.backbone(self.inputs) body_feats = self.backbone(self.inputs)
spatial_scale = None spatial_scale = 1. / 16
# Neck # Neck
if self.neck is not None: if self.neck is not None:
...@@ -87,8 +87,8 @@ class MaskRCNN(BaseArch): ...@@ -87,8 +87,8 @@ class MaskRCNN(BaseArch):
# compute targets here when training # compute targets here when training
rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out) rois = self.proposal(self.inputs, self.rpn_head_out, self.anchor_out)
# BBox Head # BBox Head
bbox_feat, self.bbox_head_out = self.bbox_head(body_feats, rois, bbox_feat, self.bbox_head_out, self.bbox_head_feat_func = self.bbox_head(
spatial_scale) body_feats, rois, spatial_scale)
rois_has_mask_int32 = None rois_has_mask_int32 = None
if self.inputs['mode'] == 'infer': if self.inputs['mode'] == 'infer':
...@@ -106,9 +106,9 @@ class MaskRCNN(BaseArch): ...@@ -106,9 +106,9 @@ class MaskRCNN(BaseArch):
bbox_targets) bbox_targets)
# Mask Head # Mask Head
self.mask_head_out = self.mask_head(self.inputs, body_feats, self.mask_head_out = self.mask_head(
self.bboxes, bbox_feat, self.inputs, body_feats, self.bboxes, bbox_feat,
rois_has_mask_int32, spatial_scale) rois_has_mask_int32, spatial_scale, self.bbox_head_feat_func)
def get_loss(self, ): def get_loss(self, ):
loss = {} loss = {}
......
...@@ -22,6 +22,9 @@ from paddle.regularizer import L2Decay ...@@ -22,6 +22,9 @@ from paddle.regularizer import L2Decay
from ppdet.core.workspace import register from ppdet.core.workspace import register
from ppdet.modeling import ops from ppdet.modeling import ops
from ..backbone.name_adapter import NameAdapter
from ..backbone.resnet import Blocks
@register @register
class TwoFCHead(nn.Layer): class TwoFCHead(nn.Layer):
...@@ -74,6 +77,23 @@ class TwoFCHead(nn.Layer): ...@@ -74,6 +77,23 @@ class TwoFCHead(nn.Layer):
return fc7_relu return fc7_relu
@register
class Res5Head(nn.Layer):
def __init__(self, feat_in=1024, feat_out=512):
super(Res5Head, self).__init__()
na = NameAdapter(self)
self.res5_conv = []
self.res5 = self.add_sublayer(
'res5_roi_feat',
Blocks(
feat_in, feat_out, count=3, name_adapter=na, stage_num=5))
self.feat_out = feat_out * 4
def forward(self, roi_feat, stage=0):
y = self.res5(roi_feat)
return y
@register @register
class BBoxFeat(nn.Layer): class BBoxFeat(nn.Layer):
__inject__ = ['roi_extractor', 'head_feat'] __inject__ = ['roi_extractor', 'head_feat']
...@@ -86,7 +106,7 @@ class BBoxFeat(nn.Layer): ...@@ -86,7 +106,7 @@ class BBoxFeat(nn.Layer):
def forward(self, body_feats, rois, spatial_scale, stage=0): def forward(self, body_feats, rois, spatial_scale, stage=0):
rois_feat = self.roi_extractor(body_feats, rois, spatial_scale) rois_feat = self.roi_extractor(body_feats, rois, spatial_scale)
bbox_feat = self.head_feat(rois_feat, stage) bbox_feat = self.head_feat(rois_feat, stage)
return bbox_feat return bbox_feat, self.head_feat
@register @register
...@@ -139,15 +159,19 @@ class BBoxHead(nn.Layer): ...@@ -139,15 +159,19 @@ class BBoxHead(nn.Layer):
self.bbox_delta_list.append(bbox_delta) self.bbox_delta_list.append(bbox_delta)
def forward(self, body_feats, rois, spatial_scale, stage=0): def forward(self, body_feats, rois, spatial_scale, stage=0):
bbox_feat = self.bbox_feat(body_feats, rois, spatial_scale, stage) bbox_feat, head_feat_func = self.bbox_feat(body_feats, rois,
if self.with_pool: spatial_scale, stage)
bbox_feat = F.pool2d(
bbox_feat, pool_type='avg', global_pooling=True)
bbox_head_out = [] bbox_head_out = []
scores = self.bbox_score_list[stage](bbox_feat) if self.with_pool:
deltas = self.bbox_delta_list[stage](bbox_feat) bbox_feat_ = F.adaptive_avg_pool2d(bbox_feat, output_size=1)
bbox_feat_ = paddle.squeeze(bbox_feat_, axis=[2, 3])
scores = self.bbox_score_list[stage](bbox_feat_)
deltas = self.bbox_delta_list[stage](bbox_feat_)
else:
scores = self.bbox_score_list[stage](bbox_feat)
deltas = self.bbox_delta_list[stage](bbox_feat)
bbox_head_out.append((scores, deltas)) bbox_head_out.append((scores, deltas))
return bbox_feat, bbox_head_out return bbox_feat, bbox_head_out, head_feat_func
def _get_head_loss(self, score, delta, target): def _get_head_loss(self, score, delta, target):
# bbox cls # bbox cls
......
...@@ -28,8 +28,8 @@ class MaskFeat(Layer): ...@@ -28,8 +28,8 @@ class MaskFeat(Layer):
__inject__ = ['mask_roi_extractor'] __inject__ = ['mask_roi_extractor']
def __init__(self, def __init__(self,
mask_roi_extractor, mask_roi_extractor=None,
num_convs=1, num_convs=0,
feat_in=2048, feat_in=2048,
feat_out=256, feat_out=256,
mask_num_stages=1, mask_num_stages=1,
...@@ -82,12 +82,16 @@ class MaskFeat(Layer): ...@@ -82,12 +82,16 @@ class MaskFeat(Layer):
bbox_feat, bbox_feat,
mask_index, mask_index,
spatial_scale, spatial_scale,
stage=0): stage=0,
if self.share_bbox_feat: bbox_head_feat_func=None,
mode='train'):
if self.share_bbox_feat and mask_index:
rois_feat = paddle.gather(bbox_feat, mask_index) rois_feat = paddle.gather(bbox_feat, mask_index)
else: else:
rois_feat = self.mask_roi_extractor(body_feats, bboxes, rois_feat = self.mask_roi_extractor(body_feats, bboxes,
spatial_scale) spatial_scale)
if bbox_head_feat_func is not None and mode == 'infer':
rois_feat = bbox_head_feat_func(rois_feat)
# upsample # upsample
mask_feat = self.upsample_module[stage](rois_feat) mask_feat = self.upsample_module[stage](rois_feat)
return mask_feat return mask_feat
...@@ -131,8 +135,14 @@ class MaskHead(Layer): ...@@ -131,8 +135,14 @@ class MaskHead(Layer):
spatial_scale, spatial_scale,
stage=0): stage=0):
# feat # feat
mask_feat = self.mask_feat(body_feats, bboxes, bbox_feat, mask_index, mask_feat = self.mask_feat(
spatial_scale, stage) body_feats,
bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage,
mode='train')
# logits # logits
mask_head_out = self.mask_fcn_logits[stage](mask_feat) mask_head_out = self.mask_fcn_logits[stage](mask_feat)
return mask_head_out return mask_head_out
...@@ -144,7 +154,8 @@ class MaskHead(Layer): ...@@ -144,7 +154,8 @@ class MaskHead(Layer):
bbox_feat, bbox_feat,
mask_index, mask_index,
spatial_scale, spatial_scale,
stage=0): stage=0,
bbox_head_feat_func=None):
bbox, bbox_num = bboxes bbox, bbox_num = bboxes
if bbox.shape[0] == 0: if bbox.shape[0] == 0:
mask_head_out = bbox mask_head_out = bbox
...@@ -155,11 +166,18 @@ class MaskHead(Layer): ...@@ -155,11 +166,18 @@ class MaskHead(Layer):
scale_factor_list.append(scale_factor[idx, 0]) scale_factor_list.append(scale_factor[idx, 0])
scale_factor_list = paddle.cast( scale_factor_list = paddle.cast(
paddle.concat(scale_factor_list), 'float32') paddle.concat(scale_factor_list), 'float32')
scaled_bbox = paddle.multiply( scale_factor_list = paddle.reshape(scale_factor_list, shape=[-1, 1])
bbox[:, 2:], scale_factor_list, axis=0) scaled_bbox = paddle.multiply(bbox[:, 2:], scale_factor_list)
scaled_bboxes = (scaled_bbox, bbox_num) scaled_bboxes = (scaled_bbox, bbox_num)
mask_feat = self.mask_feat(body_feats, scaled_bboxes, bbox_feat, mask_feat = self.mask_feat(
mask_index, spatial_scale, stage) body_feats,
scaled_bboxes,
bbox_feat,
mask_index,
spatial_scale,
stage,
bbox_head_feat_func,
mode='infer')
mask_logit = self.mask_fcn_logits[stage](mask_feat) mask_logit = self.mask_fcn_logits[stage](mask_feat)
mask_head_out = F.sigmoid(mask_logit) mask_head_out = F.sigmoid(mask_logit)
return mask_head_out return mask_head_out
...@@ -171,15 +189,16 @@ class MaskHead(Layer): ...@@ -171,15 +189,16 @@ class MaskHead(Layer):
bbox_feat, bbox_feat,
mask_index, mask_index,
spatial_scale, spatial_scale,
bbox_head_feat_func=None,
stage=0): stage=0):
if inputs['mode'] == 'train': if inputs['mode'] == 'train':
mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat, mask_head_out = self.forward_train(body_feats, bboxes, bbox_feat,
mask_index, spatial_scale, stage) mask_index, spatial_scale, stage)
else: else:
scale_factor = inputs['scale_factor'] scale_factor = inputs['scale_factor']
mask_head_out = self.forward_test(scale_factor, body_feats, bboxes, mask_head_out = self.forward_test(
bbox_feat, mask_index, scale_factor, body_feats, bboxes, bbox_feat, mask_index,
spatial_scale, stage) spatial_scale, stage, bbox_head_feat_func)
return mask_head_out return mask_head_out
def get_loss(self, mask_head_out, mask_target): def get_loss(self, mask_head_out, mask_target):
......
...@@ -295,21 +295,13 @@ class RCNNBox(object): ...@@ -295,21 +295,13 @@ class RCNNBox(object):
box_normalized=self.box_normalized, box_normalized=self.box_normalized,
axis=self.axis) axis=self.axis)
# TODO: Updata box_clip # TODO: Updata box_clip
origin_h = origin_shape[:, 0] - 1 origin_h = paddle.unsqueeze(origin_shape[:, 0] - 1, axis=1)
origin_w = origin_shape[:, 1] - 1 origin_w = paddle.unsqueeze(origin_shape[:, 1] - 1, axis=1)
zeros = paddle.zeros(origin_h.shape, 'float32') zeros = paddle.zeros(origin_h.shape, 'float32')
x1 = paddle.maximum( x1 = paddle.maximum(paddle.minimum(bbox[:, :, 0], origin_w), zeros)
paddle.minimum( y1 = paddle.maximum(paddle.minimum(bbox[:, :, 1], origin_h), zeros)
bbox[:, :, 0], origin_w, axis=0), zeros, axis=0) x2 = paddle.maximum(paddle.minimum(bbox[:, :, 2], origin_w), zeros)
y1 = paddle.maximum( y2 = paddle.maximum(paddle.minimum(bbox[:, :, 3], origin_h), zeros)
paddle.minimum(
bbox[:, :, 1], origin_h, axis=0), zeros, axis=0)
x2 = paddle.maximum(
paddle.minimum(
bbox[:, :, 2], origin_w, axis=0), zeros, axis=0)
y2 = paddle.maximum(
paddle.minimum(
bbox[:, :, 3], origin_h, axis=0), zeros, axis=0)
bbox = paddle.stack([x1, y1, x2, y2], axis=-1) bbox = paddle.stack([x1, y1, x2, y2], axis=-1)
bboxes = (bbox, rois_num) bboxes = (bbox, rois_num)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册