yolo4.py 15.1 KB
Newer Older
J
JiaQi Xu 已提交
1 2
from functools import wraps

B
Bubbliiiing 已提交
3
import keras
J
JiaQi Xu 已提交
4 5 6
import numpy as np
import tensorflow as tf
from keras import backend as K
B
Bubbliiiing 已提交
7 8
from keras.layers import (Add, Concatenate, Conv2D, MaxPooling2D, UpSampling2D,
                          ZeroPadding2D)
J
JiaQi Xu 已提交
9 10 11 12 13 14
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from utils.utils import compose

B
Bubbliiiing 已提交
15 16
from nets.CSPdarknet53 import darknet_body

J
JiaQi Xu 已提交
17 18

#--------------------------------------------------#
B
Bubbliiiing 已提交
19 20
#   单次卷积DarknetConv2D
#   如果步长为2则自己设定padding方式。
B
Bubbliiiing 已提交
21
#   测试中发现没有l2正则化效果更好,所以去掉了l2正则化
J
JiaQi Xu 已提交
22 23 24
#--------------------------------------------------#
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
B
Bubbliiiing 已提交
25 26
    # darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
    darknet_conv_kwargs = {}
J
JiaQi Xu 已提交
27 28 29 30 31
    darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
    darknet_conv_kwargs.update(kwargs)
    return Conv2D(*args, **darknet_conv_kwargs)

#---------------------------------------------------#
B
Bubbliiiing 已提交
32
#   卷积块 -> 卷积 + 标准化 + 激活函数
J
JiaQi Xu 已提交
33 34 35 36 37 38 39 40 41 42 43
#   DarknetConv2D + BatchNormalization + LeakyReLU
#---------------------------------------------------#
def DarknetConv2D_BN_Leaky(*args, **kwargs):
    no_bias_kwargs = {'use_bias': False}
    no_bias_kwargs.update(kwargs)
    return compose( 
        DarknetConv2D(*args, **no_bias_kwargs),
        BatchNormalization(),
        LeakyReLU(alpha=0.1))

#---------------------------------------------------#
B
Bubbliiiing 已提交
44
#   进行五次卷积
J
JiaQi Xu 已提交
45 46 47 48 49 50 51 52 53 54 55
#---------------------------------------------------#
def make_five_convs(x, num_filters):
    # 五次卷积
    x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
    x = DarknetConv2D_BN_Leaky(num_filters*2, (3,3))(x)
    x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
    x = DarknetConv2D_BN_Leaky(num_filters*2, (3,3))(x)
    x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
    return x

#---------------------------------------------------#
B
Bubbliiiing 已提交
56
#   Panet网络的构建,并且获得预测结果
J
JiaQi Xu 已提交
57 58
#---------------------------------------------------#
def yolo_body(inputs, num_anchors, num_classes):
B
Bubbliiiing 已提交
59 60 61 62 63 64 65
    #---------------------------------------------------#   
    #   生成CSPdarknet53的主干模型
    #   获得三个有效特征层,他们的shape分别是:
    #   52,52,256
    #   26,26,512
    #   13,13,1024
    #---------------------------------------------------#
J
JiaQi Xu 已提交
66 67
    feat1,feat2,feat3 = darknet_body(inputs)

B
Bubbliiiing 已提交
68
    # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,2048 -> 13,13,512 -> 13,13,1024 -> 13,13,512
J
JiaQi Xu 已提交
69 70 71 72 73 74 75 76 77 78 79 80
    P5 = DarknetConv2D_BN_Leaky(512, (1,1))(feat3)
    P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
    P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5)
    # 使用了SPP结构,即不同尺度的最大池化后堆叠。
    maxpool1 = MaxPooling2D(pool_size=(13,13), strides=(1,1), padding='same')(P5)
    maxpool2 = MaxPooling2D(pool_size=(9,9), strides=(1,1), padding='same')(P5)
    maxpool3 = MaxPooling2D(pool_size=(5,5), strides=(1,1), padding='same')(P5)
    P5 = Concatenate()([maxpool1, maxpool2, maxpool3, P5])
    P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5)
    P5 = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
    P5 = DarknetConv2D_BN_Leaky(512, (1,1))(P5)

B
Bubbliiiing 已提交
81
    # 13,13,512 -> 13,13,256 -> 26,26,256
J
JiaQi Xu 已提交
82
    P5_upsample = compose(DarknetConv2D_BN_Leaky(256, (1,1)), UpSampling2D(2))(P5)
B
Bubbliiiing 已提交
83
    # 26,26,512 -> 26,26,256
J
JiaQi Xu 已提交
84
    P4 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2)
B
Bubbliiiing 已提交
85
    # 26,26,256 + 26,26,256 -> 26,26,512
J
JiaQi Xu 已提交
86
    P4 = Concatenate()([P4, P5_upsample])
B
Bubbliiiing 已提交
87 88
    
    # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
J
JiaQi Xu 已提交
89 90
    P4 = make_five_convs(P4,256)

B
Bubbliiiing 已提交
91
    # 26,26,256 -> 26,26,128 -> 52,52,128
J
JiaQi Xu 已提交
92
    P4_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P4)
B
Bubbliiiing 已提交
93
    # 52,52,256 -> 52,52,128
J
JiaQi Xu 已提交
94
    P3 = DarknetConv2D_BN_Leaky(128, (1,1))(feat1)
B
Bubbliiiing 已提交
95
    # 52,52,128 + 52,52,128 -> 52,52,256
J
JiaQi Xu 已提交
96 97
    P3 = Concatenate()([P3, P4_upsample])

B
Bubbliiiing 已提交
98 99 100 101 102 103 104
    # 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
    P3 = make_five_convs(P3,128)
    
    #---------------------------------------------------#
    #   第三个特征层
    #   y3=(batch_size,52,52,3,85)
    #---------------------------------------------------#
J
JiaQi Xu 已提交
105
    P3_output = DarknetConv2D_BN_Leaky(256, (3,3))(P3)
B
Bubbliiiing 已提交
106
    P3_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1), kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01))(P3_output)
J
JiaQi Xu 已提交
107

B
Bubbliiiing 已提交
108
    # 52,52,128 -> 26,26,256
J
JiaQi Xu 已提交
109
    P3_downsample = ZeroPadding2D(((1,0),(1,0)))(P3)
B
Bubbliiiing 已提交
110
    P3_downsample = DarknetConv2D_BN_Leaky(256, (3,3), strides=(2,2))(P3_downsample)
B
Bubbliiiing 已提交
111
    # 26,26,256 + 26,26,256 -> 26,26,512
J
JiaQi Xu 已提交
112
    P4 = Concatenate()([P3_downsample, P4])
B
Bubbliiiing 已提交
113
    # 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
J
JiaQi Xu 已提交
114 115
    P4 = make_five_convs(P4,256)
    
B
Bubbliiiing 已提交
116 117 118 119
    #---------------------------------------------------#
    #   第二个特征层
    #   y2=(batch_size,26,26,3,85)
    #---------------------------------------------------#
J
JiaQi Xu 已提交
120
    P4_output = DarknetConv2D_BN_Leaky(512, (3,3))(P4)
B
Bubbliiiing 已提交
121
    P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1), kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01))(P4_output)
J
JiaQi Xu 已提交
122
    
B
Bubbliiiing 已提交
123
    # 26,26,256 -> 13,13,512
J
JiaQi Xu 已提交
124 125
    P4_downsample = ZeroPadding2D(((1,0),(1,0)))(P4)
    P4_downsample = DarknetConv2D_BN_Leaky(512, (3,3), strides=(2,2))(P4_downsample)
B
Bubbliiiing 已提交
126
    # 13,13,512 + 13,13,512 -> 13,13,1024
J
JiaQi Xu 已提交
127
    P5 = Concatenate()([P4_downsample, P5])
B
Bubbliiiing 已提交
128
    # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
J
JiaQi Xu 已提交
129 130
    P5 = make_five_convs(P5,512)
    
B
Bubbliiiing 已提交
131 132 133 134
    #---------------------------------------------------#
    #   第一个特征层
    #   y1=(batch_size,13,13,3,85)
    #---------------------------------------------------#
J
JiaQi Xu 已提交
135
    P5_output = DarknetConv2D_BN_Leaky(1024, (3,3))(P5)
B
Bubbliiiing 已提交
136
    P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1), kernel_initializer=keras.initializers.normal(mean=0.0, stddev=0.01))(P5_output)
J
JiaQi Xu 已提交
137 138 139 140 141 142 143 144

    return Model(inputs, [P5_output, P4_output, P3_output])

#---------------------------------------------------#
#   将预测值的每个特征层调成真实值
#---------------------------------------------------#
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
    num_anchors = len(anchors)
B
Bubbliiiing 已提交
145 146 147
    #---------------------------------------------------#
    #   [1, 1, 1, num_anchors, 2]
    #---------------------------------------------------#
J
JiaQi Xu 已提交
148 149
    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])

B
Bubbliiiing 已提交
150 151 152 153 154
    #---------------------------------------------------#
    #   获得x,y的网格
    #   (13, 13, 1, 2)
    #---------------------------------------------------#
    grid_shape = K.shape(feats)[1:3]
J
JiaQi Xu 已提交
155 156 157 158 159 160 161
    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
        [1, grid_shape[1], 1, 1])
    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
        [grid_shape[0], 1, 1, 1])
    grid = K.concatenate([grid_x, grid_y])
    grid = K.cast(grid, K.dtype(feats))

B
Bubbliiiing 已提交
162 163 164 165 166 167 168
    #---------------------------------------------------#
    #   将预测结果调整成(batch_size,13,13,3,85)
    #   85可拆分成4 + 1 + 80
    #   4代表的是中心宽高的调整参数
    #   1代表的是框的置信度
    #   80代表的是种类的置信度
    #---------------------------------------------------#
J
JiaQi Xu 已提交
169 170
    feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])

B
Bubbliiiing 已提交
171 172 173 174 175
    #---------------------------------------------------#
    #   将预测值调成真实值
    #   box_xy对应框的中心点
    #   box_wh对应框的宽和高
    #---------------------------------------------------#
J
JiaQi Xu 已提交
176 177 178 179 180
    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
    box_confidence = K.sigmoid(feats[..., 4:5])
    box_class_probs = K.sigmoid(feats[..., 5:])

B
Bubbliiiing 已提交
181 182 183 184
    #---------------------------------------------------------------------#
    #   在计算loss的时候返回grid, feats, box_xy, box_wh
    #   在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
    #---------------------------------------------------------------------#
J
JiaQi Xu 已提交
185 186 187 188 189 190 191 192
    if calc_loss == True:
        return grid, feats, box_xy, box_wh
    return box_xy, box_wh, box_confidence, box_class_probs

#---------------------------------------------------#
#   对box进行调整,使其符合真实图片的样子
#---------------------------------------------------#
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
B
Bubbliiiing 已提交
193 194 195
    #-----------------------------------------------------------------#
    #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
    #-----------------------------------------------------------------#
J
JiaQi Xu 已提交
196 197 198 199 200 201 202
    box_yx = box_xy[..., ::-1]
    box_hw = box_wh[..., ::-1]
    
    input_shape = K.cast(input_shape, K.dtype(box_yx))
    image_shape = K.cast(image_shape, K.dtype(box_yx))

    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
B
Bubbliiiing 已提交
203 204 205 206
    #-----------------------------------------------------------------#
    #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
    #   new_shape指的是宽高缩放情况
    #-----------------------------------------------------------------#
J
JiaQi Xu 已提交
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
    offset = (input_shape-new_shape)/2./input_shape
    scale = input_shape/new_shape

    box_yx = (box_yx - offset) * scale
    box_hw *= scale

    box_mins = box_yx - (box_hw / 2.)
    box_maxes = box_yx + (box_hw / 2.)
    boxes =  K.concatenate([
        box_mins[..., 0:1],  # y_min
        box_mins[..., 1:2],  # x_min
        box_maxes[..., 0:1],  # y_max
        box_maxes[..., 1:2]  # x_max
    ])

    boxes *= K.concatenate([image_shape, image_shape])
    return boxes

#---------------------------------------------------#
#   获取每个box和它的得分
#---------------------------------------------------#
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape):
B
Bubbliiiing 已提交
229 230 231 232 233 234 235
    #-----------------------------------------------------------------#
    #   将预测值调成真实值
    #   box_xy : -1,13,13,3,2; 
    #   box_wh : -1,13,13,3,2; 
    #   box_confidence : -1,13,13,3,1; 
    #   box_class_probs : -1,13,13,3,80;
    #-----------------------------------------------------------------#
J
JiaQi Xu 已提交
236
    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
B
Bubbliiiing 已提交
237 238 239 240 241 242
    #-----------------------------------------------------------------#
    #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
    #   因此生成的box_xy, box_wh是相对于有灰条的图像的
    #   我们需要对齐进行修改,去除灰条的部分。
    #   将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
    #-----------------------------------------------------------------#
J
JiaQi Xu 已提交
243
    boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
B
Bubbliiiing 已提交
244 245 246
    #-----------------------------------------------------------------#
    #   获得最终得分和框的位置
    #-----------------------------------------------------------------#
J
JiaQi Xu 已提交
247 248 249 250 251 252 253 254 255 256 257 258 259 260 261
    boxes = K.reshape(boxes, [-1, 4])
    box_scores = box_confidence * box_class_probs
    box_scores = K.reshape(box_scores, [-1, num_classes])
    return boxes, box_scores

#---------------------------------------------------#
#   图片预测
#---------------------------------------------------#
def yolo_eval(yolo_outputs,
              anchors,
              num_classes,
              image_shape,
              max_boxes=20,
              score_threshold=.6,
              iou_threshold=.5):
B
Bubbliiiing 已提交
262 263 264
    #---------------------------------------------------#
    #   获得特征层的数量,有效特征层的数量为3
    #---------------------------------------------------#
J
JiaQi Xu 已提交
265
    num_layers = len(yolo_outputs)
B
Bubbliiiing 已提交
266 267 268 269 270
    #-----------------------------------------------------------#
    #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
    #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
    #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
    #-----------------------------------------------------------#
J
JiaQi Xu 已提交
271 272
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
    
B
Bubbliiiing 已提交
273 274 275
    #-----------------------------------------------------------#
    #   这里获得的是输入图片的大小,一般是416x416
    #-----------------------------------------------------------#
J
JiaQi Xu 已提交
276 277 278
    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
    boxes = []
    box_scores = []
B
Bubbliiiing 已提交
279 280 281
    #-----------------------------------------------------------#
    #   对每个特征层进行处理
    #-----------------------------------------------------------#
J
JiaQi Xu 已提交
282 283 284 285
    for l in range(num_layers):
        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape)
        boxes.append(_boxes)
        box_scores.append(_box_scores)
B
Bubbliiiing 已提交
286 287 288
    #-----------------------------------------------------------#
    #   将每个特征层的结果进行堆叠
    #-----------------------------------------------------------#
J
JiaQi Xu 已提交
289 290 291
    boxes = K.concatenate(boxes, axis=0)
    box_scores = K.concatenate(box_scores, axis=0)

B
Bubbliiiing 已提交
292 293 294
    #-----------------------------------------------------------#
    #   判断得分是否大于score_threshold
    #-----------------------------------------------------------#
J
JiaQi Xu 已提交
295 296 297 298 299 300
    mask = box_scores >= score_threshold
    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
    boxes_ = []
    scores_ = []
    classes_ = []
    for c in range(num_classes):
B
Bubbliiiing 已提交
301 302 303
        #-----------------------------------------------------------#
        #   取出所有box_scores >= score_threshold的框,和成绩
        #-----------------------------------------------------------#
J
JiaQi Xu 已提交
304 305 306
        class_boxes = tf.boolean_mask(boxes, mask[:, c])
        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])

B
Bubbliiiing 已提交
307 308 309 310
        #-----------------------------------------------------------#
        #   非极大抑制
        #   保留一定区域内得分最大的框
        #-----------------------------------------------------------#
J
JiaQi Xu 已提交
311 312 313
        nms_index = tf.image.non_max_suppression(
            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)

B
Bubbliiiing 已提交
314 315 316 317 318
        #-----------------------------------------------------------#
        #   获取非极大抑制后的结果
        #   下列三个分别是
        #   框的位置,得分与种类
        #-----------------------------------------------------------#
J
JiaQi Xu 已提交
319 320 321 322 323 324 325 326 327 328 329 330 331
        class_boxes = K.gather(class_boxes, nms_index)
        class_box_scores = K.gather(class_box_scores, nms_index)
        classes = K.ones_like(class_box_scores, 'int32') * c
        boxes_.append(class_boxes)
        scores_.append(class_box_scores)
        classes_.append(classes)
    boxes_ = K.concatenate(boxes_, axis=0)
    scores_ = K.concatenate(scores_, axis=0)
    classes_ = K.concatenate(classes_, axis=0)

    return boxes_, scores_, classes_