未验证 提交 b4284fd8 编写于 作者: B Bubbliiiing 提交者: GitHub

Add files via upload

上级 19307150
......@@ -17,20 +17,31 @@ video.py里面测试的FPS会低于该FPS,因为摄像头的读取频率有限
'''
class FPS_YOLO(YOLO):
def get_FPS(self, image, test_interval):
# 调整图片使其符合输入要求
new_image_size = (self.model_image_size[1],self.model_image_size[0])
boxed_image = letterbox_image(image, new_image_size)
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
if self.letterbox_image:
boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
else:
boxed_image = image.convert('RGB')
boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
image_data = np.expand_dims(image_data, 0)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(image_data, 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
K.learning_phase(): 0})
t1 = time.time()
for _ in range(test_interval):
......@@ -39,8 +50,7 @@ class FPS_YOLO(YOLO):
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0
})
K.learning_phase(): 0})
t2 = time.time()
tact_time = (t2 - t1) / test_interval
return tact_time
......
import colorsys
import json
import os
import matplotlib.pyplot as plt
import numpy as np
import pylab
from keras import backend as K
from keras.applications.imagenet_utils import preprocess_input
from keras.layers import Input
from PIL import Image
from tqdm import tqdm
from nets.yolo4_tiny import yolo_body, yolo_eval
from utils.utils import letterbox_image
from yolo import YOLO
coco_classes = {'person': 1, 'bicycle': 2, 'car': 3, 'motorbike': 4, 'aeroplane': 5,
'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11,
'': 83, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17,
'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24,
'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33,
'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39,
'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44,
'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52,
'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58,
'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'sofa': 63, 'pottedplant': 64, 'bed': 65,
'diningtable': 67, 'toilet': 70, 'tvmonitor': 72, 'laptop': 73, 'mouse': 74, 'remote': 75,
'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81,
'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88,
'hair drier': 89, 'toothbrush': 90
}
class mAP_YOLO(YOLO):
#---------------------------------------------------#
# 获得所有的分类
#---------------------------------------------------#
def generate(self):
self.score = 0.01
self.iou = 0.5
model_path = os.path.expanduser(self.model_path)
assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
# 计算anchor数量
num_anchors = len(self.anchors)
num_classes = len(self.class_names)
# 载入模型,如果原来的模型里已经包括了模型结构则直接载入。
# 否则先构建模型再载入
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
self.yolo_model.load_weights(self.model_path)
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
'Mismatch between model and given anchor and class sizes'
print('{} model, anchors, and classes loaded.'.format(model_path))
# 画框设置不同的颜色
hsv_tuples = [(x / len(self.class_names), 1., 1.)
for x in range(len(self.class_names))]
self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
self.colors = list(
map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
self.colors))
# 打乱颜色
np.random.seed(10101)
np.random.shuffle(self.colors)
np.random.seed(None)
self.input_image_shape = K.placeholder(shape=(2, ))
#---------------------------------------------------------#
# 在yolo_eval函数中,我们会对预测结果进行后处理
# 后处理的内容包括,解码、非极大抑制、门限筛选等
#---------------------------------------------------------#
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape, max_boxes = self.max_boxes,
score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image)
return boxes, scores, classes
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image_id, image, results):
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
if self.letterbox_image:
boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
else:
boxed_image = image.convert('RGB')
boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
image_data = np.expand_dims(image_data, 0)
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
out_boxes, out_scores, out_classes = self.sess.run(
[self.boxes, self.scores, self.classes],
feed_dict={
self.yolo_model.input: image_data,
self.input_image_shape: [image.size[1], image.size[0]],
K.learning_phase(): 0})
for i, c in enumerate(out_classes):
result = {}
predicted_class = self.class_names[c]
top, left, bottom, right = out_boxes[i]
top = max(0, np.floor(top + 0.5).astype('int32'))
left = max(0, np.floor(left + 0.5).astype('int32'))
bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
result["image_id"] = int(image_id)
result["category_id"] = coco_classes[predicted_class]
result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)]
result["score"] = float(out_scores[i])
results.append(result)
return results
yolo = mAP_YOLO()
jpg_names = os.listdir("./coco_dataset/val2017")
with open("./coco_dataset/eval_results.json","w") as f:
results = []
for jpg_name in tqdm(jpg_names):
if jpg_name.endswith("jpg"):
image_path = "./coco_dataset/val2017/" + jpg_name
image = Image.open(image_path)
# 开启后在之后计算mAP可以可视化
results = yolo.detect_image(jpg_name.split(".")[0],image,results)
json.dump(results,f)
......@@ -13,7 +13,7 @@ from keras.layers import Input
from PIL import Image
from tqdm import tqdm
from nets.yolo4 import yolo_body, yolo_eval
from nets.yolo4_tiny import yolo_body, yolo_eval
from utils.utils import letterbox_image
from yolo import YOLO
......@@ -41,7 +41,7 @@ class mAP_YOLO(YOLO):
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
self.yolo_model.load_weights(self.model_path)
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
......@@ -71,7 +71,7 @@ class mAP_YOLO(YOLO):
#---------------------------------------------------------#
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape, max_boxes = self.max_boxes,
score_threshold = self.score, iou_threshold = self.iou)
score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image)
return boxes, scores, classes
#---------------------------------------------------#
......@@ -81,9 +81,13 @@ class mAP_YOLO(YOLO):
f = open("./input/detection-results/"+image_id+".txt","w")
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
new_image_size = (self.model_image_size[1],self.model_image_size[0])
boxed_image = letterbox_image(image, new_image_size)
if self.letterbox_image:
boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
else:
boxed_image = image.convert('RGB')
boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
#---------------------------------------------------------#
......
......@@ -86,7 +86,7 @@ if __name__ == '__main__':
# 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
# 会生成yolo_anchors.txt
SIZE = 416
anchors_num = 9
anchors_num = 6
# 载入数据集,可以使用VOC的xml
path = r'./VOCdevkit/VOC2007/Annotations'
......
from functools import wraps
import tensorflow as tf
from keras import backend as K
from keras.layers import (Add, Concatenate, Conv2D, Lambda, Layer,
MaxPooling2D, UpSampling2D, ZeroPadding2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.regularizers import l2
from utils.utils import compose
def route_group(input_layer, groups, group_id):
convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
return convs[group_id]
#--------------------------------------------------#
# 单次卷积DarknetConv2D
# 如果步长为2则自己设定padding方式。
# 测试中发现没有l2正则化效果更好,所以去掉了l2正则化
#--------------------------------------------------#
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
# darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
darknet_conv_kwargs = {}
darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
darknet_conv_kwargs.update(kwargs)
return Conv2D(*args, **darknet_conv_kwargs)
#---------------------------------------------------#
# 卷积块
# DarknetConv2D + BatchNormalization + LeakyReLU
#---------------------------------------------------#
def DarknetConv2D_BN_Leaky(*args, **kwargs):
no_bias_kwargs = {'use_bias': False}
no_bias_kwargs.update(kwargs)
return compose(
DarknetConv2D(*args, **no_bias_kwargs),
BatchNormalization(),
LeakyReLU(alpha=0.1))
'''
input
|
DarknetConv2D_BN_Leaky
-----------------------
| |
route_group route
| |
DarknetConv2D_BN_Leaky |
| |
------------------- |
| | |
route_1 DarknetConv2D_BN_Leaky |
| | |
-------------Concatenate |
| |
----DarknetConv2D_BN_Leaky |
| | |
feat Concatenate-----------------
|
MaxPooling2D
'''
#---------------------------------------------------#
# CSPdarknet_tiny的结构块
# 存在一个大残差边
# 这个大残差边绕过了很多的残差结构
#---------------------------------------------------#
def resblock_body(x, num_filters):
# 利用一个3x3卷积进行特征整合
x = DarknetConv2D_BN_Leaky(num_filters, (3,3))(x)
# 引出一个大的残差边route
route = x
# 对特征层的通道进行分割,取第二部分作为主干部分。
x = Lambda(route_group,arguments={'groups':2, 'group_id':1})(x)
# 对主干部分进行3x3卷积
x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
# 引出一个小的残差边route_1
route_1 = x
# 对第主干部分进行3x3卷积
x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
# 主干部分与残差部分进行相接
x = Concatenate()([x, route_1])
# 对相接后的结果进行1x1卷积
x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
feat = x
x = Concatenate()([route, x])
# 利用最大池化进行高和宽的压缩
x = MaxPooling2D(pool_size=[2,2],)(x)
return x, feat
#---------------------------------------------------#
# CSPdarknet_tiny的主体部分
#---------------------------------------------------#
def darknet_body(x):
# 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩
# 416,416,3 -> 208,208,32 -> 104,104,64
x = ZeroPadding2D(((1,0),(1,0)))(x)
x = DarknetConv2D_BN_Leaky(32, (3,3), strides=(2,2))(x)
x = ZeroPadding2D(((1,0),(1,0)))(x)
x = DarknetConv2D_BN_Leaky(64, (3,3), strides=(2,2))(x)
# 104,104,64 -> 52,52,128
x, _ = resblock_body(x,num_filters = 64)
# 52,52,128 -> 26,26,256
x, _ = resblock_body(x,num_filters = 128)
# 26,26,256 -> x为13,13,512
# -> feat1为26,26,256
x, feat1 = resblock_body(x,num_filters = 256)
# 13,13,512 -> 13,13,512
x = DarknetConv2D_BN_Leaky(512, (3,3))(x)
feat2 = x
return feat1, feat2
from keras import backend as K
import tensorflow as tf
import math
import tensorflow as tf
from keras import backend as K
def box_ciou(b1, b2):
"""
输入为:
......
......@@ -99,21 +99,20 @@ def box_iou(b1, b2):
# loss值计算
#---------------------------------------------------#
def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True):
# 一共有
# 一共有
num_layers = len(anchors)//3
#---------------------------------------------------------------------------------------------------#
# 将预测结果和实际ground truth分开,args是[*model_body.output, *y_true]
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)
# yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)
#---------------------------------------------------------------------------------------------------#
y_true = args[num_layers:]
yolo_outputs = args[:num_layers]
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
# 13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
# 26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
......@@ -130,8 +129,8 @@ def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1,
mf = K.cast(m, K.dtype(yolo_outputs[0]))
#---------------------------------------------------------------------------------------------------#
# y_true是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# yolo_outputs是一个列表,包含三个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
# y_true是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)
# yolo_outputs是一个列表,包含两个特征层,shape分别为(m,13,13,3,85),(m,26,26,3,85)
#---------------------------------------------------------------------------------------------------#
for l in range(num_layers):
#-----------------------------------------------------------#
......
from functools import wraps
import numpy as np
import tensorflow as tf
from keras import backend as K
from keras.layers import (Add, Concatenate, Conv2D, MaxPooling2D, UpSampling2D,
ZeroPadding2D)
from keras.layers.advanced_activations import LeakyReLU
from keras.layers.normalization import BatchNormalization
from keras.models import Model
from keras.regularizers import l2
from utils.utils import compose
from nets.CSPdarknet53_tiny import darknet_body
#--------------------------------------------------#
# 单次卷积DarknetConv2D
# 如果步长为2则自己设定padding方式。
# 测试中发现没有l2正则化效果更好,所以去掉了l2正则化
#--------------------------------------------------#
@wraps(Conv2D)
def DarknetConv2D(*args, **kwargs):
# darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
darknet_conv_kwargs = {}
darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
darknet_conv_kwargs.update(kwargs)
return Conv2D(*args, **darknet_conv_kwargs)
#---------------------------------------------------#
# 卷积块
# DarknetConv2D + BatchNormalization + LeakyReLU
#---------------------------------------------------#
def DarknetConv2D_BN_Leaky(*args, **kwargs):
no_bias_kwargs = {'use_bias': False}
no_bias_kwargs.update(kwargs)
return compose(
DarknetConv2D(*args, **no_bias_kwargs),
BatchNormalization(),
LeakyReLU(alpha=0.1))
#---------------------------------------------------#
# 特征层->最后的输出
#---------------------------------------------------#
def yolo_body(inputs, num_anchors, num_classes):
#---------------------------------------------------#
# 生成CSPdarknet53_tiny的主干模型
# feat1的shape为26,26,256
# feat2的shape为13,13,512
#---------------------------------------------------#
feat1, feat2 = darknet_body(inputs)
# 13,13,512 -> 13,13,256
P5 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2)
# 13,13,256 -> 13,13,512 -> 13,13,255
P5_output = DarknetConv2D_BN_Leaky(512, (3,3))(P5)
P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output)
# 13,13,256 -> 13,13,128 -> 26,26,128
P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P5)
# 26,26,256 + 26,26,128 -> 26,26,384
P4 = Concatenate()([P5_upsample, feat1])
# 26,26,384 -> 26,26,256 -> 26,26,255
P4_output = DarknetConv2D_BN_Leaky(256, (3,3))(P4)
P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output)
return Model(inputs, [P5_output, P4_output])
#---------------------------------------------------#
# 将预测值的每个特征层调成真实值
#---------------------------------------------------#
def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
num_anchors = len(anchors)
#---------------------------------------------------#
# [1, 1, 1, num_anchors, 2]
#---------------------------------------------------#
anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
#---------------------------------------------------#
# 获得x,y的网格
# (13, 13, 1, 2)
#---------------------------------------------------#
grid_shape = K.shape(feats)[1:3]
grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
[1, grid_shape[1], 1, 1])
grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
[grid_shape[0], 1, 1, 1])
grid = K.concatenate([grid_x, grid_y])
grid = K.cast(grid, K.dtype(feats))
#---------------------------------------------------#
# 将预测结果调整成(batch_size,13,13,3,85)
# 85可拆分成4 + 1 + 80
# 4代表的是中心宽高的调整参数
# 1代表的是框的置信度
# 80代表的是种类的置信度
#---------------------------------------------------#
feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
#---------------------------------------------------#
# 将预测值调成真实值
# box_xy对应框的中心点
# box_wh对应框的宽和高
#---------------------------------------------------#
box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
box_confidence = K.sigmoid(feats[..., 4:5])
box_class_probs = K.sigmoid(feats[..., 5:])
#---------------------------------------------------------------------#
# 在计算loss的时候返回grid, feats, box_xy, box_wh
# 在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------------------------#
if calc_loss == True:
return grid, feats, box_xy, box_wh
return box_xy, box_wh, box_confidence, box_class_probs
#---------------------------------------------------#
# 对box进行调整,使其符合真实图片的样子
#---------------------------------------------------#
def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
#-----------------------------------------------------------------#
# 把y轴放前面是因为方便预测框和图像的宽高进行相乘
#-----------------------------------------------------------------#
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
input_shape = K.cast(input_shape, K.dtype(box_yx))
image_shape = K.cast(image_shape, K.dtype(box_yx))
new_shape = K.round(image_shape * K.min(input_shape/image_shape))
#-----------------------------------------------------------------#
# 这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
# new_shape指的是宽高缩放情况
#-----------------------------------------------------------------#
offset = (input_shape-new_shape)/2./input_shape
scale = input_shape/new_shape
box_yx = (box_yx - offset) * scale
box_hw *= scale
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
boxes = K.concatenate([
box_mins[..., 0:1], # y_min
box_mins[..., 1:2], # x_min
box_maxes[..., 0:1], # y_max
box_maxes[..., 1:2] # x_max
])
boxes *= K.concatenate([image_shape, image_shape])
return boxes
#---------------------------------------------------#
# 获取每个box和它的得分
#---------------------------------------------------#
def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape, letterbox_image):
#-----------------------------------------------------------------#
# 将预测值调成真实值
# box_xy : -1,13,13,3,2;
# box_wh : -1,13,13,3,2;
# box_confidence : -1,13,13,3,1;
# box_class_probs : -1,13,13,3,80;
#-----------------------------------------------------------------#
box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的box_xy, box_wh是相对于有灰条的图像的
# 我们需要对齐进行修改,去除灰条的部分。
# 将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
#-----------------------------------------------------------------#
if letterbox_image:
boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
else:
box_yx = box_xy[..., ::-1]
box_hw = box_wh[..., ::-1]
box_mins = box_yx - (box_hw / 2.)
box_maxes = box_yx + (box_hw / 2.)
input_shape = K.cast(input_shape, K.dtype(box_yx))
image_shape = K.cast(image_shape, K.dtype(box_yx))
boxes = K.concatenate([
box_mins[..., 0:1] * image_shape[0], # y_min
box_mins[..., 1:2] * image_shape[1], # x_min
box_maxes[..., 0:1] * image_shape[0], # y_max
box_maxes[..., 1:2] * image_shape[1] # x_max
])
#-----------------------------------------------------------------#
# 获得最终得分和框的位置
#-----------------------------------------------------------------#
boxes = K.reshape(boxes, [-1, 4])
box_scores = box_confidence * box_class_probs
box_scores = K.reshape(box_scores, [-1, num_classes])
return boxes, box_scores
#---------------------------------------------------#
# 图片预测
#---------------------------------------------------#
def yolo_eval(yolo_outputs,
anchors,
num_classes,
image_shape,
max_boxes=20,
score_threshold=.6,
iou_threshold=.5,
letterbox_image=True):
#---------------------------------------------------#
# 获得特征层的数量,有效特征层的数量为3
#---------------------------------------------------#
num_layers = len(yolo_outputs)
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
# 26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
#-----------------------------------------------------------#
# 这里获得的是输入图片的大小,一般是416x416
#-----------------------------------------------------------#
input_shape = K.shape(yolo_outputs[0])[1:3] * 32
boxes = []
box_scores = []
#-----------------------------------------------------------#
# 对每个特征层进行处理
#-----------------------------------------------------------#
for l in range(num_layers):
_boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, letterbox_image)
boxes.append(_boxes)
box_scores.append(_box_scores)
#-----------------------------------------------------------#
# 将每个特征层的结果进行堆叠
#-----------------------------------------------------------#
boxes = K.concatenate(boxes, axis=0)
box_scores = K.concatenate(box_scores, axis=0)
#-----------------------------------------------------------#
# 判断得分是否大于score_threshold
#-----------------------------------------------------------#
mask = box_scores >= score_threshold
max_boxes_tensor = K.constant(max_boxes, dtype='int32')
boxes_ = []
scores_ = []
classes_ = []
for c in range(num_classes):
#-----------------------------------------------------------#
# 取出所有box_scores >= score_threshold的框,和成绩
#-----------------------------------------------------------#
class_boxes = tf.boolean_mask(boxes, mask[:, c])
class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
#-----------------------------------------------------------#
# 非极大抑制
# 保留一定区域内得分最大的框
#-----------------------------------------------------------#
nms_index = tf.image.non_max_suppression(
class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
#-----------------------------------------------------------#
# 获取非极大抑制后的结果
# 下列三个分别是
# 框的位置,得分与种类
#-----------------------------------------------------------#
class_boxes = K.gather(class_boxes, nms_index)
class_box_scores = K.gather(class_box_scores, nms_index)
classes = K.ones_like(class_box_scores, 'int32') * c
boxes_.append(class_boxes)
scores_.append(class_box_scores)
classes_.append(classes)
boxes_ = K.concatenate(boxes_, axis=0)
scores_ = K.concatenate(scores_, axis=0)
classes_ = K.concatenate(classes_, axis=0)
return boxes_, scores_, classes_
......@@ -8,7 +8,6 @@ predict.py有几个注意点
from keras.layers import Input
from PIL import Image
from nets.yolo4 import yolo_body
from yolo import YOLO
yolo = YOLO()
......
......@@ -5,7 +5,7 @@
#--------------------------------------------#
from keras.layers import Input
from nets.yolo4 import yolo_body
from nets.yolo4_tiny import yolo_body
if __name__ == "__main__":
inputs = Input([416, 416, 3])
......
......@@ -9,7 +9,7 @@ from keras.models import Model
from keras.optimizers import Adam
from nets.loss import yolo_loss
from nets.yolo4 import yolo_body
from nets.yolo4_tiny import yolo_body
from utils.utils import (WarmUpCosineDecayScheduler, get_random_data,
get_random_data_with_Mosaic, rand)
......@@ -67,20 +67,19 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
#---------------------------------------------------#
def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
# 一共有个特征层数
# 一共有个特征层数
num_layers = len(anchors)//3
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
# 26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
# 52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
# 13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
# 26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
#-----------------------------------------------------------#
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
#-----------------------------------------------------------#
# 获得框的坐标和图片的大小
#-----------------------------------------------------------#
true_boxes = np.array(true_boxes, dtype='float32')
input_shape = np.array(input_shape, dtype='int32')
input_shape = np.array(input_shape, dtype='int32')
#-----------------------------------------------------------#
# 通过计算获得真实框的中心和宽高
# 中心点(m,n,2) 宽高(m,n,2)
......@@ -103,7 +102,7 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
dtype='float32') for l in range(num_layers)]
#-----------------------------------------------------------#
# [9,2] -> [1,9,2]
# [6,2] -> [1,6,2]
#-----------------------------------------------------------#
anchors = np.expand_dims(anchors, 0)
anchor_maxes = anchors / 2.
......@@ -127,10 +126,10 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
#-----------------------------------------------------------#
# 计算所有真实框和先验框的交并比
# intersect_area [n,9]
# intersect_area [n,6]
# box_area [n,1]
# anchor_area [1,9]
# iou [n,9]
# anchor_area [1,6]
# iou [n,6]
#-----------------------------------------------------------#
intersect_mins = np.maximum(box_mins, anchor_mins)
intersect_maxes = np.minimum(box_maxes, anchor_maxes)
......@@ -200,7 +199,7 @@ if __name__ == "__main__":
# 训练自己的数据集时提示维度不匹配正常
# 预测的东西都不一样了自然维度不匹配
#------------------------------------------------------#
weights_path = 'model_data/yolo4_weight.h5'
weights_path = 'model_data/yolov4_tiny_weights_coco.h5'
#------------------------------------------------------#
# 训练用图片大小
# 一般在416x416和608x608选择
......@@ -239,8 +238,8 @@ if __name__ == "__main__":
#------------------------------------------------------#
image_input = Input(shape=(None, None, 3))
h, w = input_shape
print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
model_body = yolo_body(image_input, num_anchors//3, num_classes)
print('Create YOLOv4-Tiny model with {} anchors and {} classes.'.format(num_anchors, num_classes))
model_body = yolo_body(image_input, num_anchors//2, num_classes)
#------------------------------------------------------#
# 载入预训练权重
......@@ -252,12 +251,10 @@ if __name__ == "__main__":
# 在这个地方设置损失,将网络的输出结果传入loss函数
# 把整个模型的输出作为loss
#------------------------------------------------------#
y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
num_anchors//3, num_classes+5)) for l in range(3)]
y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)]
loss_input = [*model_body.output, *y_true]
model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5,
'label_smoothing': label_smoothing, 'normalize': normalize})(loss_input)
arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing, 'normalize':normalize})(loss_input)
model = Model([model_body.input, *y_true], model_loss)
......@@ -287,10 +284,6 @@ if __name__ == "__main__":
num_val = int(len(lines)*val_split)
num_train = len(lines) - num_val
freeze_layers = 249
for i in range(freeze_layers): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
......@@ -299,10 +292,15 @@ if __name__ == "__main__":
# Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
freeze_layers = 60
for i in range(freeze_layers): model_body.layers[i].trainable = False
print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
# 调整非主干模型first
if True:
Init_epoch = 0
Freeze_epoch = 50
batch_size = 8
batch_size = 32
learning_rate_base = 1e-3
if Cosine_scheduler:
......@@ -326,9 +324,9 @@ if __name__ == "__main__":
model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
validation_steps=max(1, num_val//batch_size),
epochs=Freeze_epoch,
initial_epoch=Init_epoch,
......@@ -337,10 +335,11 @@ if __name__ == "__main__":
for i in range(freeze_layers): model_body.layers[i].trainable = True
# 解冻后训练
if True:
Freeze_epoch = 50
Epoch = 100
batch_size = 2
batch_size = 16
learning_rate_base = 1e-4
if Cosine_scheduler:
......@@ -364,9 +363,9 @@ if __name__ == "__main__":
model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})
print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
steps_per_epoch=max(1, num_train//batch_size),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
validation_steps=max(1, num_val//batch_size),
epochs=Epoch,
initial_epoch=Freeze_epoch,
......
import collections
import colorsys
import copy
import os
......@@ -9,7 +10,7 @@ from keras.layers import Input
from keras.models import load_model
from PIL import Image, ImageDraw, ImageFont
from nets.yolo4 import yolo_body, yolo_eval
from nets.yolo4_tiny import yolo_body, yolo_eval
from utils.utils import letterbox_image
......@@ -21,7 +22,7 @@ from utils.utils import letterbox_image
#--------------------------------------------#
class YOLO(object):
_defaults = {
"model_path" : 'model_data/yolo4_weight.h5',
"model_path" : 'model_data/yolov4_tiny_weights_coco.h5',
"anchors_path" : 'model_data/yolo_anchors.txt',
"classes_path" : 'model_data/coco_classes.txt',
"score" : 0.5,
......@@ -29,7 +30,12 @@ class YOLO(object):
"max_boxes" : 100,
# 显存比较小可以使用416x416
# 显存比较大可以使用608x608
"model_image_size" : (416, 416)
"model_image_size" : (416, 416),
#---------------------------------------------------------------------#
# 该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize,
# 在多次测试后,发现关闭letterbox_image直接resize的效果更好
#---------------------------------------------------------------------#
"letterbox_image" : False,
}
@classmethod
......@@ -89,7 +95,7 @@ class YOLO(object):
try:
self.yolo_model = load_model(model_path, compile=False)
except:
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
self.yolo_model.load_weights(self.model_path)
else:
assert self.yolo_model.layers[-1].output_shape[-1] == \
......@@ -119,19 +125,22 @@ class YOLO(object):
#---------------------------------------------------------#
boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
num_classes, self.input_image_shape, max_boxes = self.max_boxes,
score_threshold = self.score, iou_threshold = self.iou)
score_threshold = self.score, iou_threshold = self.iou, letterbox_image = self.letterbox_image)
return boxes, scores, classes
#---------------------------------------------------#
# 检测图片
#---------------------------------------------------#
def detect_image(self, image):
start = timer()
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别
#---------------------------------------------------------#
new_image_size = (self.model_image_size[1],self.model_image_size[0])
boxed_image = letterbox_image(image, new_image_size)
if self.letterbox_image:
boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
else:
boxed_image = image.convert('RGB')
boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
image_data = np.array(boxed_image, dtype='float32')
image_data /= 255.
#---------------------------------------------------------#
......@@ -197,8 +206,6 @@ class YOLO(object):
draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
del draw
end = timer()
print(end - start)
return image
def close_session(self):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册