From c68f8a8f9bf257baddee9838484fa8958ab804e9 Mon Sep 17 00:00:00 2001 From: keineahnung2345 Date: Tue, 25 Sep 2018 15:49:29 +0800 Subject: [PATCH] fix output shape of fpn_classifier_graph 1. fix the comment on output shape in fpn_classifier_graph 2. unify NUM_CLASSES and num_classes to NUM_CLASSES 3. unify boxes, num_boxes, num_rois, roi_count to num_rois 4. use more specific POOL_SIZE and MASK_ POOL_SIZE to replace pool_height and pool_width --- mrcnn/model.py | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mrcnn/model.py b/mrcnn/model.py index 391a760..c10bced 100644 --- a/mrcnn/model.py +++ b/mrcnn/model.py @@ -910,20 +910,20 @@ def fpn_classifier_graph(rois, feature_maps, image_meta, coordinates. feature_maps: List of feature maps from different layers of the pyramid, [P2, P3, P4, P5]. Each has a different resolution. - - image_meta: [batch, (meta data)] Image details. See compose_image_meta() + image_meta: [batch, (meta data)] Image details. See compose_image_meta() pool_size: The width of the square feature map generated from ROI Pooling. num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers fc_layers_size: Size of the 2 FC layers Returns: - logits: [N, NUM_CLASSES] classifier logits (before softmax) - probs: [N, NUM_CLASSES] classifier probabilities - bbox_deltas: [N, (dy, dx, log(dh), log(dw))] Deltas to apply to + logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) + probs: [batch, num_rois, NUM_CLASSES] classifier probabilities + bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to proposal boxes """ # ROI Pooling - # Shape: [batch, num_boxes, pool_height, pool_width, channels] + # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_classifier")([rois, image_meta] + feature_maps) # Two 1024 FC layers (implemented with Conv2D for consistency) @@ -946,10 +946,10 @@ def fpn_classifier_graph(rois, feature_maps, image_meta, name="mrcnn_class")(mrcnn_class_logits) # BBox head - # [batch, boxes, num_classes * (dy, dx, log(dh), log(dw))] + # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'), name='mrcnn_bbox_fc')(shared) - # Reshape to [batch, boxes, num_classes, (dy, dx, log(dh), log(dw))] + # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] s = K.int_shape(x) mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) @@ -969,10 +969,10 @@ def build_fpn_mask_graph(rois, feature_maps, image_meta, num_classes: number of classes, which determines the depth of the results train_bn: Boolean. Train or freeze Batch Norm layers - Returns: Masks [batch, roi_count, height, width, num_classes] + Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES] """ # ROI Pooling - # Shape: [batch, boxes, pool_height, pool_width, channels] + # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] x = PyramidROIAlign([pool_size, pool_size], name="roi_align_mask")([rois, image_meta] + feature_maps) -- GitLab