diff --git a/FPS_test.py b/FPS_test.py index 5a827c7861599b737717bf40afafb13a2aa2f60b..c432218995f386b74d4431110c3e32ee7d47f254 100644 --- a/FPS_test.py +++ b/FPS_test.py @@ -17,20 +17,31 @@ video.py里面测试的FPS会低于该FPS,因为摄像头的读取频率有限 ''' class FPS_YOLO(YOLO): def get_FPS(self, image, test_interval): - # 调整图片使其符合输入要求 - new_image_size = (self.model_image_size[1],self.model_image_size[0]) - boxed_image = letterbox_image(image, new_image_size) + #---------------------------------------------------------# + # 给图像增加灰条,实现不失真的resize + # 也可以直接resize进行识别 + #---------------------------------------------------------# + if self.letterbox_image: + boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])) + else: + boxed_image = image.convert('RGB') + boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. - image_data = np.expand_dims(image_data, 0) + #---------------------------------------------------------# + # 添加上batch_size维度 + #---------------------------------------------------------# + image_data = np.expand_dims(image_data, 0) + #---------------------------------------------------------# + # 将图像输入网络当中进行预测! + #---------------------------------------------------------# out_boxes, out_scores, out_classes = self.sess.run( [self.boxes, self.scores, self.classes], feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) + K.learning_phase(): 0}) t1 = time.time() for _ in range(test_interval): @@ -39,8 +50,7 @@ class FPS_YOLO(YOLO): feed_dict={ self.yolo_model.input: image_data, self.input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) + K.learning_phase(): 0}) t2 = time.time() tact_time = (t2 - t1) / test_interval return tact_time diff --git a/eval_coco.py b/eval_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..a623ca6e7f9a9a13e662a42b336ced6913cc89e9 --- /dev/null +++ b/eval_coco.py @@ -0,0 +1,145 @@ +import colorsys +import json +import os + +import matplotlib.pyplot as plt +import numpy as np +import pylab +from keras import backend as K +from keras.applications.imagenet_utils import preprocess_input +from keras.layers import Input +from PIL import Image +from tqdm import tqdm + +from nets.yolo4_tiny import yolo_body, yolo_eval +from utils.utils import letterbox_image +from yolo import YOLO + +coco_classes = {'person': 1, 'bicycle': 2, 'car': 3, 'motorbike': 4, 'aeroplane': 5, + 'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11, + '': 83, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, + 'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24, + 'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, + 'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39, + 'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44, + 'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, + 'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, + 'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'sofa': 63, 'pottedplant': 64, 'bed': 65, + 'diningtable': 67, 'toilet': 70, 'tvmonitor': 72, 'laptop': 73, 'mouse': 74, 'remote': 75, + 'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, + 'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, + 'hair drier': 89, 'toothbrush': 90 +} + +class mAP_YOLO(YOLO): + #---------------------------------------------------# + # 获得所有的分类 + #---------------------------------------------------# + def generate(self): + self.score = 0.01 + self.iou = 0.5 + model_path = os.path.expanduser(self.model_path) + assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.' + + # 计算anchor数量 + num_anchors = len(self.anchors) + num_classes = len(self.class_names) + + # 载入模型,如果原来的模型里已经包括了模型结构则直接载入。 + # 否则先构建模型再载入 + try: + self.yolo_model = load_model(model_path, compile=False) + except: + self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) + self.yolo_model.load_weights(self.model_path) + else: + assert self.yolo_model.layers[-1].output_shape[-1] == \ + num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ + 'Mismatch between model and given anchor and class sizes' + + print('{} model, anchors, and classes loaded.'.format(model_path)) + + # 画框设置不同的颜色 + hsv_tuples = [(x / len(self.class_names), 1., 1.) + for x in range(len(self.class_names))] + self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) + self.colors = list( + map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), + self.colors)) + + # 打乱颜色 + np.random.seed(10101) + np.random.shuffle(self.colors) + np.random.seed(None) + + self.input_image_shape = K.placeholder(shape=(2, )) + + #---------------------------------------------------------# + # 在yolo_eval函数中,我们会对预测结果进行后处理 + # 后处理的内容包括,解码、非极大抑制、门限筛选等 + #---------------------------------------------------------# + boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, + num_classes, self.input_image_shape, max_boxes = self.max_boxes, + score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image) + return boxes, scores, classes + + #---------------------------------------------------# + # 检测图片 + #---------------------------------------------------# + def detect_image(self, image_id, image, results): + #---------------------------------------------------------# + # 给图像增加灰条,实现不失真的resize + #---------------------------------------------------------# + if self.letterbox_image: + boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])) + else: + boxed_image = image.convert('RGB') + boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC) + image_data = np.array(boxed_image, dtype='float32') + image_data /= 255. + #---------------------------------------------------------# + # 添加上batch_size维度 + #---------------------------------------------------------# + image_data = np.expand_dims(image_data, 0) + + #---------------------------------------------------------# + # 将图像输入网络当中进行预测! + #---------------------------------------------------------# + out_boxes, out_scores, out_classes = self.sess.run( + [self.boxes, self.scores, self.classes], + feed_dict={ + self.yolo_model.input: image_data, + self.input_image_shape: [image.size[1], image.size[0]], + K.learning_phase(): 0}) + + for i, c in enumerate(out_classes): + result = {} + predicted_class = self.class_names[c] + top, left, bottom, right = out_boxes[i] + + top = max(0, np.floor(top + 0.5).astype('int32')) + left = max(0, np.floor(left + 0.5).astype('int32')) + bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) + right = min(image.size[0], np.floor(right + 0.5).astype('int32')) + + result["image_id"] = int(image_id) + result["category_id"] = coco_classes[predicted_class] + result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)] + result["score"] = float(out_scores[i]) + results.append(result) + + return results + +yolo = mAP_YOLO() + +jpg_names = os.listdir("./coco_dataset/val2017") + +with open("./coco_dataset/eval_results.json","w") as f: + results = [] + for jpg_name in tqdm(jpg_names): + if jpg_name.endswith("jpg"): + image_path = "./coco_dataset/val2017/" + jpg_name + image = Image.open(image_path) + # 开启后在之后计算mAP可以可视化 + results = yolo.detect_image(jpg_name.split(".")[0],image,results) + json.dump(results,f) diff --git a/get_dr_txt.py b/get_dr_txt.py index d7e666a0c664507576aa3d9f05321d406a6e00a3..f6c802a13fe4d75d41ec9265be1d9afe5ba27b2e 100644 --- a/get_dr_txt.py +++ b/get_dr_txt.py @@ -13,7 +13,7 @@ from keras.layers import Input from PIL import Image from tqdm import tqdm -from nets.yolo4 import yolo_body, yolo_eval +from nets.yolo4_tiny import yolo_body, yolo_eval from utils.utils import letterbox_image from yolo import YOLO @@ -41,7 +41,7 @@ class mAP_YOLO(YOLO): try: self.yolo_model = load_model(model_path, compile=False) except: - self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) + self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) self.yolo_model.load_weights(self.model_path) else: assert self.yolo_model.layers[-1].output_shape[-1] == \ @@ -71,7 +71,7 @@ class mAP_YOLO(YOLO): #---------------------------------------------------------# boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, num_classes, self.input_image_shape, max_boxes = self.max_boxes, - score_threshold = self.score, iou_threshold = self.iou) + score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image) return boxes, scores, classes #---------------------------------------------------# @@ -81,9 +81,13 @@ class mAP_YOLO(YOLO): f = open("./input/detection-results/"+image_id+".txt","w") #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize + # 也可以直接resize进行识别 #---------------------------------------------------------# - new_image_size = (self.model_image_size[1],self.model_image_size[0]) - boxed_image = letterbox_image(image, new_image_size) + if self.letterbox_image: + boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])) + else: + boxed_image = image.convert('RGB') + boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. #---------------------------------------------------------# diff --git a/kmeans_for_anchors.py b/kmeans_for_anchors.py index 2dcbbc0e3d15abc624580a296271b126186da848..e289ac4518c1d9bde48c8fe481adc44d5946c3eb 100644 --- a/kmeans_for_anchors.py +++ b/kmeans_for_anchors.py @@ -86,7 +86,7 @@ if __name__ == '__main__': # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml # 会生成yolo_anchors.txt SIZE = 416 - anchors_num = 9 + anchors_num = 6 # 载入数据集,可以使用VOC的xml path = r'./VOCdevkit/VOC2007/Annotations' diff --git a/nets/CSPdarknet53_tiny.py b/nets/CSPdarknet53_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..621d7bb466c061861bd366f50032751d2a92b7ec --- /dev/null +++ b/nets/CSPdarknet53_tiny.py @@ -0,0 +1,119 @@ +from functools import wraps + +import tensorflow as tf +from keras import backend as K +from keras.layers import (Add, Concatenate, Conv2D, Lambda, Layer, + MaxPooling2D, UpSampling2D, ZeroPadding2D) +from keras.layers.advanced_activations import LeakyReLU +from keras.layers.normalization import BatchNormalization +from keras.regularizers import l2 +from utils.utils import compose + + +def route_group(input_layer, groups, group_id): + convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1) + return convs[group_id] + +#--------------------------------------------------# +# 单次卷积DarknetConv2D +# 如果步长为2则自己设定padding方式。 +# 测试中发现没有l2正则化效果更好,所以去掉了l2正则化 +#--------------------------------------------------# +@wraps(Conv2D) +def DarknetConv2D(*args, **kwargs): + # darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} + darknet_conv_kwargs = {} + darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' + darknet_conv_kwargs.update(kwargs) + return Conv2D(*args, **darknet_conv_kwargs) + +#---------------------------------------------------# +# 卷积块 +# DarknetConv2D + BatchNormalization + LeakyReLU +#---------------------------------------------------# +def DarknetConv2D_BN_Leaky(*args, **kwargs): + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetConv2D(*args, **no_bias_kwargs), + BatchNormalization(), + LeakyReLU(alpha=0.1)) + +''' + input + | + DarknetConv2D_BN_Leaky + ----------------------- + | | + route_group route + | | + DarknetConv2D_BN_Leaky | + | | + ------------------- | + | | | + route_1 DarknetConv2D_BN_Leaky | + | | | + -------------Concatenate | + | | + ----DarknetConv2D_BN_Leaky | + | | | + feat Concatenate----------------- + | + MaxPooling2D +''' +#---------------------------------------------------# +# CSPdarknet_tiny的结构块 +# 存在一个大残差边 +# 这个大残差边绕过了很多的残差结构 +#---------------------------------------------------# +def resblock_body(x, num_filters): + # 利用一个3x3卷积进行特征整合 + x = DarknetConv2D_BN_Leaky(num_filters, (3,3))(x) + # 引出一个大的残差边route + route = x + + # 对特征层的通道进行分割,取第二部分作为主干部分。 + x = Lambda(route_group,arguments={'groups':2, 'group_id':1})(x) + # 对主干部分进行3x3卷积 + x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x) + # 引出一个小的残差边route_1 + route_1 = x + # 对第主干部分进行3x3卷积 + x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x) + # 主干部分与残差部分进行相接 + x = Concatenate()([x, route_1]) + + # 对相接后的结果进行1x1卷积 + x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x) + feat = x + x = Concatenate()([route, x]) + + # 利用最大池化进行高和宽的压缩 + x = MaxPooling2D(pool_size=[2,2],)(x) + + return x, feat + +#---------------------------------------------------# +# CSPdarknet_tiny的主体部分 +#---------------------------------------------------# +def darknet_body(x): + # 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩 + # 416,416,3 -> 208,208,32 -> 104,104,64 + x = ZeroPadding2D(((1,0),(1,0)))(x) + x = DarknetConv2D_BN_Leaky(32, (3,3), strides=(2,2))(x) + x = ZeroPadding2D(((1,0),(1,0)))(x) + x = DarknetConv2D_BN_Leaky(64, (3,3), strides=(2,2))(x) + + # 104,104,64 -> 52,52,128 + x, _ = resblock_body(x,num_filters = 64) + # 52,52,128 -> 26,26,256 + x, _ = resblock_body(x,num_filters = 128) + # 26,26,256 -> x为13,13,512 + # -> feat1为26,26,256 + x, feat1 = resblock_body(x,num_filters = 256) + # 13,13,512 -> 13,13,512 + x = DarknetConv2D_BN_Leaky(512, (3,3))(x) + + feat2 = x + return feat1, feat2 + diff --git a/nets/ious.py b/nets/ious.py index a0c7a3ff3ff029f381460bec7b15f24c23cd05e4..7464c8379c73f3751e4bc74a770db22bd04b553a 100644 --- a/nets/ious.py +++ b/nets/ious.py @@ -1,6 +1,9 @@ -from keras import backend as K -import tensorflow as tf import math + +import tensorflow as tf +from keras import backend as K + + def box_ciou(b1, b2): """ 输入为: diff --git a/nets/loss.py b/nets/loss.py index cddbc39693499f92a24bde1270c88e8658a76560..174bf285bbd221a2b789bba31607a0480a2b0874 100644 --- a/nets/loss.py +++ b/nets/loss.py @@ -99,21 +99,20 @@ def box_iou(b1, b2): # loss值计算 #---------------------------------------------------# def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True): - # 一共有三层 + # 一共有两层 num_layers = len(anchors)//3 #---------------------------------------------------------------------------------------------------# # 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true] - # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 - # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85) + # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85) #---------------------------------------------------------------------------------------------------# y_true = args[num_layers:] yolo_outputs = args[:num_layers] #-----------------------------------------------------------# - # 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401] - # 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146] - # 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28] + # 13x13的特征层对应的anchor是[81,82], [135,169], [344,319] + # 26x26的特征层对应的anchor是[23,27], [37,58], [81,82] #-----------------------------------------------------------# anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] @@ -130,8 +129,8 @@ def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, mf = K.cast(m, K.dtype(yolo_outputs[0])) #---------------------------------------------------------------------------------------------------# - # y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 - # yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。 + # y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85) + # yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85) #---------------------------------------------------------------------------------------------------# for l in range(num_layers): #-----------------------------------------------------------# diff --git a/nets/yolo4_tiny.py b/nets/yolo4_tiny.py new file mode 100644 index 0000000000000000000000000000000000000000..92ebc2fa9d0bba2d9ed2203642b3c8c17b4ecabe --- /dev/null +++ b/nets/yolo4_tiny.py @@ -0,0 +1,279 @@ +from functools import wraps + +import numpy as np +import tensorflow as tf +from keras import backend as K +from keras.layers import (Add, Concatenate, Conv2D, MaxPooling2D, UpSampling2D, + ZeroPadding2D) +from keras.layers.advanced_activations import LeakyReLU +from keras.layers.normalization import BatchNormalization +from keras.models import Model +from keras.regularizers import l2 +from utils.utils import compose + +from nets.CSPdarknet53_tiny import darknet_body + + +#--------------------------------------------------# +# 单次卷积DarknetConv2D +# 如果步长为2则自己设定padding方式。 +# 测试中发现没有l2正则化效果更好,所以去掉了l2正则化 +#--------------------------------------------------# +@wraps(Conv2D) +def DarknetConv2D(*args, **kwargs): + # darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} + darknet_conv_kwargs = {} + darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same' + darknet_conv_kwargs.update(kwargs) + return Conv2D(*args, **darknet_conv_kwargs) + + +#---------------------------------------------------# +# 卷积块 +# DarknetConv2D + BatchNormalization + LeakyReLU +#---------------------------------------------------# +def DarknetConv2D_BN_Leaky(*args, **kwargs): + no_bias_kwargs = {'use_bias': False} + no_bias_kwargs.update(kwargs) + return compose( + DarknetConv2D(*args, **no_bias_kwargs), + BatchNormalization(), + LeakyReLU(alpha=0.1)) + +#---------------------------------------------------# +# 特征层->最后的输出 +#---------------------------------------------------# +def yolo_body(inputs, num_anchors, num_classes): + #---------------------------------------------------# + # 生成CSPdarknet53_tiny的主干模型 + # feat1的shape为26,26,256 + # feat2的shape为13,13,512 + #---------------------------------------------------# + feat1, feat2 = darknet_body(inputs) + + # 13,13,512 -> 13,13,256 + P5 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2) + # 13,13,256 -> 13,13,512 -> 13,13,255 + P5_output = DarknetConv2D_BN_Leaky(512, (3,3))(P5) + P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output) + + # 13,13,256 -> 13,13,128 -> 26,26,128 + P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P5) + + # 26,26,256 + 26,26,128 -> 26,26,384 + P4 = Concatenate()([P5_upsample, feat1]) + + # 26,26,384 -> 26,26,256 -> 26,26,255 + P4_output = DarknetConv2D_BN_Leaky(256, (3,3))(P4) + P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output) + + return Model(inputs, [P5_output, P4_output]) + +#---------------------------------------------------# +# 将预测值的每个特征层调成真实值 +#---------------------------------------------------# +def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): + num_anchors = len(anchors) + #---------------------------------------------------# + # [1, 1, 1, num_anchors, 2] + #---------------------------------------------------# + anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) + + #---------------------------------------------------# + # 获得x,y的网格 + # (13, 13, 1, 2) + #---------------------------------------------------# + grid_shape = K.shape(feats)[1:3] + grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), + [1, grid_shape[1], 1, 1]) + grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), + [grid_shape[0], 1, 1, 1]) + grid = K.concatenate([grid_x, grid_y]) + grid = K.cast(grid, K.dtype(feats)) + + #---------------------------------------------------# + # 将预测结果调整成(batch_size,13,13,3,85) + # 85可拆分成4 + 1 + 80 + # 4代表的是中心宽高的调整参数 + # 1代表的是框的置信度 + # 80代表的是种类的置信度 + #---------------------------------------------------# + feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) + + #---------------------------------------------------# + # 将预测值调成真实值 + # box_xy对应框的中心点 + # box_wh对应框的宽和高 + #---------------------------------------------------# + box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats)) + box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats)) + box_confidence = K.sigmoid(feats[..., 4:5]) + box_class_probs = K.sigmoid(feats[..., 5:]) + + #---------------------------------------------------------------------# + # 在计算loss的时候返回grid, feats, box_xy, box_wh + # 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs + #---------------------------------------------------------------------# + if calc_loss == True: + return grid, feats, box_xy, box_wh + return box_xy, box_wh, box_confidence, box_class_probs + +#---------------------------------------------------# +# 对box进行调整,使其符合真实图片的样子 +#---------------------------------------------------# +def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): + #-----------------------------------------------------------------# + # 把y轴放前面是因为方便预测框和图像的宽高进行相乘 + #-----------------------------------------------------------------# + box_yx = box_xy[..., ::-1] + box_hw = box_wh[..., ::-1] + + input_shape = K.cast(input_shape, K.dtype(box_yx)) + image_shape = K.cast(image_shape, K.dtype(box_yx)) + + new_shape = K.round(image_shape * K.min(input_shape/image_shape)) + #-----------------------------------------------------------------# + # 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况 + # new_shape指的是宽高缩放情况 + #-----------------------------------------------------------------# + offset = (input_shape-new_shape)/2./input_shape + scale = input_shape/new_shape + + box_yx = (box_yx - offset) * scale + box_hw *= scale + + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + boxes = K.concatenate([ + box_mins[..., 0:1], # y_min + box_mins[..., 1:2], # x_min + box_maxes[..., 0:1], # y_max + box_maxes[..., 1:2] # x_max + ]) + + boxes *= K.concatenate([image_shape, image_shape]) + return boxes + +#---------------------------------------------------# +# 获取每个box和它的得分 +#---------------------------------------------------# +def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape, letterbox_image): + #-----------------------------------------------------------------# + # 将预测值调成真实值 + # box_xy : -1,13,13,3,2; + # box_wh : -1,13,13,3,2; + # box_confidence : -1,13,13,3,1; + # box_class_probs : -1,13,13,3,80; + #-----------------------------------------------------------------# + box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape) + #-----------------------------------------------------------------# + # 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条 + # 因此生成的box_xy, box_wh是相对于有灰条的图像的 + # 我们需要对齐进行修改,去除灰条的部分。 + # 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax + #-----------------------------------------------------------------# + if letterbox_image: + boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) + else: + box_yx = box_xy[..., ::-1] + box_hw = box_wh[..., ::-1] + box_mins = box_yx - (box_hw / 2.) + box_maxes = box_yx + (box_hw / 2.) + + input_shape = K.cast(input_shape, K.dtype(box_yx)) + image_shape = K.cast(image_shape, K.dtype(box_yx)) + + boxes = K.concatenate([ + box_mins[..., 0:1] * image_shape[0], # y_min + box_mins[..., 1:2] * image_shape[1], # x_min + box_maxes[..., 0:1] * image_shape[0], # y_max + box_maxes[..., 1:2] * image_shape[1] # x_max + ]) + #-----------------------------------------------------------------# + # 获得最终得分和框的位置 + #-----------------------------------------------------------------# + boxes = K.reshape(boxes, [-1, 4]) + box_scores = box_confidence * box_class_probs + box_scores = K.reshape(box_scores, [-1, num_classes]) + return boxes, box_scores + +#---------------------------------------------------# +# 图片预测 +#---------------------------------------------------# +def yolo_eval(yolo_outputs, + anchors, + num_classes, + image_shape, + max_boxes=20, + score_threshold=.6, + iou_threshold=.5, + letterbox_image=True): + #---------------------------------------------------# + # 获得特征层的数量,有效特征层的数量为3 + #---------------------------------------------------# + num_layers = len(yolo_outputs) + #-----------------------------------------------------------# + # 13x13的特征层对应的anchor是[81,82], [135,169], [344,319] + # 26x26的特征层对应的anchor是[23,27], [37,58], [81,82] + #-----------------------------------------------------------# + anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]] + + #-----------------------------------------------------------# + # 这里获得的是输入图片的大小,一般是416x416 + #-----------------------------------------------------------# + input_shape = K.shape(yolo_outputs[0])[1:3] * 32 + boxes = [] + box_scores = [] + #-----------------------------------------------------------# + # 对每个特征层进行处理 + #-----------------------------------------------------------# + for l in range(num_layers): + _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, letterbox_image) + boxes.append(_boxes) + box_scores.append(_box_scores) + #-----------------------------------------------------------# + # 将每个特征层的结果进行堆叠 + #-----------------------------------------------------------# + boxes = K.concatenate(boxes, axis=0) + box_scores = K.concatenate(box_scores, axis=0) + + #-----------------------------------------------------------# + # 判断得分是否大于score_threshold + #-----------------------------------------------------------# + mask = box_scores >= score_threshold + max_boxes_tensor = K.constant(max_boxes, dtype='int32') + boxes_ = [] + scores_ = [] + classes_ = [] + for c in range(num_classes): + #-----------------------------------------------------------# + # 取出所有box_scores >= score_threshold的框,和成绩 + #-----------------------------------------------------------# + class_boxes = tf.boolean_mask(boxes, mask[:, c]) + class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) + + #-----------------------------------------------------------# + # 非极大抑制 + # 保留一定区域内得分最大的框 + #-----------------------------------------------------------# + nms_index = tf.image.non_max_suppression( + class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) + + #-----------------------------------------------------------# + # 获取非极大抑制后的结果 + # 下列三个分别是 + # 框的位置,得分与种类 + #-----------------------------------------------------------# + class_boxes = K.gather(class_boxes, nms_index) + class_box_scores = K.gather(class_box_scores, nms_index) + classes = K.ones_like(class_box_scores, 'int32') * c + boxes_.append(class_boxes) + scores_.append(class_box_scores) + classes_.append(classes) + boxes_ = K.concatenate(boxes_, axis=0) + scores_ = K.concatenate(scores_, axis=0) + classes_ = K.concatenate(classes_, axis=0) + + return boxes_, scores_, classes_ + + diff --git a/predict.py b/predict.py index cbb247c0f97fd07bfce647d2ab63261bb27d3af8..4c58413edd70b7535db7878fca708807098d6f24 100644 --- a/predict.py +++ b/predict.py @@ -8,7 +8,6 @@ predict.py有几个注意点 from keras.layers import Input from PIL import Image -from nets.yolo4 import yolo_body from yolo import YOLO yolo = YOLO() diff --git a/test.py b/test.py index f8737413805c83e5af5ab65f2710fe8bb943dc68..f2a7565a99c49ddf80b6116d3c9ea8402329c5d2 100644 --- a/test.py +++ b/test.py @@ -5,7 +5,7 @@ #--------------------------------------------# from keras.layers import Input -from nets.yolo4 import yolo_body +from nets.yolo4_tiny import yolo_body if __name__ == "__main__": inputs = Input([416, 416, 3]) diff --git a/train.py b/train.py index 573beba8e39bfba9fdf275a6a2a82d73d7272cf6..f0d2c65ecc770bc05f146d49db88ea7ab9b075f6 100644 --- a/train.py +++ b/train.py @@ -9,7 +9,7 @@ from keras.models import Model from keras.optimizers import Adam from nets.loss import yolo_loss -from nets.yolo4 import yolo_body +from nets.yolo4_tiny import yolo_body from utils.utils import (WarmUpCosineDecayScheduler, get_random_data, get_random_data_with_Mosaic, rand) @@ -67,20 +67,19 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class #---------------------------------------------------# def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): assert (true_boxes[..., 4] [1,9,2] + # [6,2] -> [1,6,2] #-----------------------------------------------------------# anchors = np.expand_dims(anchors, 0) anchor_maxes = anchors / 2. @@ -127,10 +126,10 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): #-----------------------------------------------------------# # 计算所有真实框和先验框的交并比 - # intersect_area [n,9] + # intersect_area [n,6] # box_area [n,1] - # anchor_area [1,9] - # iou [n,9] + # anchor_area [1,6] + # iou [n,6] #-----------------------------------------------------------# intersect_mins = np.maximum(box_mins, anchor_mins) intersect_maxes = np.minimum(box_maxes, anchor_maxes) @@ -200,7 +199,7 @@ if __name__ == "__main__": # 训练自己的数据集时提示维度不匹配正常 # 预测的东西都不一样了自然维度不匹配 #------------------------------------------------------# - weights_path = 'model_data/yolo4_weight.h5' + weights_path = 'model_data/yolov4_tiny_weights_coco.h5' #------------------------------------------------------# # 训练用图片大小 # 一般在416x416和608x608选择 @@ -239,8 +238,8 @@ if __name__ == "__main__": #------------------------------------------------------# image_input = Input(shape=(None, None, 3)) h, w = input_shape - print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes)) - model_body = yolo_body(image_input, num_anchors//3, num_classes) + print('Create YOLOv4-Tiny model with {} anchors and {} classes.'.format(num_anchors, num_classes)) + model_body = yolo_body(image_input, num_anchors//2, num_classes) #------------------------------------------------------# # 载入预训练权重 @@ -252,12 +251,10 @@ if __name__ == "__main__": # 在这个地方设置损失,将网络的输出结果传入loss函数 # 把整个模型的输出作为loss #------------------------------------------------------# - y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \ - num_anchors//3, num_classes+5)) for l in range(3)] + y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)] loss_input = [*model_body.output, *y_true] model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, - 'label_smoothing': label_smoothing, 'normalize': normalize})(loss_input) + arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing, 'normalize':normalize})(loss_input) model = Model([model_body.input, *y_true], model_loss) @@ -287,10 +284,6 @@ if __name__ == "__main__": num_val = int(len(lines)*val_split) num_train = len(lines) - num_val - freeze_layers = 249 - for i in range(freeze_layers): model_body.layers[i].trainable = False - print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) - #------------------------------------------------------# # 主干特征提取网络特征通用,冻结训练可以加快训练速度 # 也可以在训练初期防止权值被破坏。 @@ -299,10 +292,15 @@ if __name__ == "__main__": # Epoch总训练世代 # 提示OOM或者显存不足请调小Batch_size #------------------------------------------------------# + freeze_layers = 60 + for i in range(freeze_layers): model_body.layers[i].trainable = False + print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers))) + + # 调整非主干模型first if True: Init_epoch = 0 Freeze_epoch = 50 - batch_size = 8 + batch_size = 32 learning_rate_base = 1e-3 if Cosine_scheduler: @@ -326,9 +324,9 @@ if __name__ == "__main__": model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) - model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True), + model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False), + validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val//batch_size), epochs=Freeze_epoch, initial_epoch=Init_epoch, @@ -337,10 +335,11 @@ if __name__ == "__main__": for i in range(freeze_layers): model_body.layers[i].trainable = True + # 解冻后训练 if True: Freeze_epoch = 50 Epoch = 100 - batch_size = 2 + batch_size = 16 learning_rate_base = 1e-4 if Cosine_scheduler: @@ -364,9 +363,9 @@ if __name__ == "__main__": model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred}) print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size)) - model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True), + model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic), steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False), + validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False), validation_steps=max(1, num_val//batch_size), epochs=Epoch, initial_epoch=Freeze_epoch, diff --git a/yolo.py b/yolo.py index f6885473f9f41a749465d92f1414fd63e2764a3d..834c8bef1b29d471513ae981b803578bbfdcaea1 100644 --- a/yolo.py +++ b/yolo.py @@ -1,3 +1,4 @@ +import collections import colorsys import copy import os @@ -9,7 +10,7 @@ from keras.layers import Input from keras.models import load_model from PIL import Image, ImageDraw, ImageFont -from nets.yolo4 import yolo_body, yolo_eval +from nets.yolo4_tiny import yolo_body, yolo_eval from utils.utils import letterbox_image @@ -21,7 +22,7 @@ from utils.utils import letterbox_image #--------------------------------------------# class YOLO(object): _defaults = { - "model_path" : 'model_data/yolo4_weight.h5', + "model_path" : 'model_data/yolov4_tiny_weights_coco.h5', "anchors_path" : 'model_data/yolo_anchors.txt', "classes_path" : 'model_data/coco_classes.txt', "score" : 0.5, @@ -29,7 +30,12 @@ class YOLO(object): "max_boxes" : 100, # 显存比较小可以使用416x416 # 显存比较大可以使用608x608 - "model_image_size" : (416, 416) + "model_image_size" : (416, 416), + #---------------------------------------------------------------------# + # 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize, + # 在多次测试后,发现关闭letterbox_image直接resize的效果更好 + #---------------------------------------------------------------------# + "letterbox_image" : False, } @classmethod @@ -89,7 +95,7 @@ class YOLO(object): try: self.yolo_model = load_model(model_path, compile=False) except: - self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes) + self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes) self.yolo_model.load_weights(self.model_path) else: assert self.yolo_model.layers[-1].output_shape[-1] == \ @@ -119,19 +125,22 @@ class YOLO(object): #---------------------------------------------------------# boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, num_classes, self.input_image_shape, max_boxes = self.max_boxes, - score_threshold = self.score, iou_threshold = self.iou) + score_threshold = self.score, iou_threshold = self.iou, letterbox_image = self.letterbox_image) return boxes, scores, classes #---------------------------------------------------# # 检测图片 #---------------------------------------------------# def detect_image(self, image): - start = timer() #---------------------------------------------------------# # 给图像增加灰条,实现不失真的resize + # 也可以直接resize进行识别 #---------------------------------------------------------# - new_image_size = (self.model_image_size[1],self.model_image_size[0]) - boxed_image = letterbox_image(image, new_image_size) + if self.letterbox_image: + boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])) + else: + boxed_image = image.convert('RGB') + boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC) image_data = np.array(boxed_image, dtype='float32') image_data /= 255. #---------------------------------------------------------# @@ -197,8 +206,6 @@ class YOLO(object): draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font) del draw - end = timer() - print(end - start) return image def close_session(self):