Add files via upload

b4284fd8 · Bubbliiiing · GitHub · 19307150 · b4284fd8 · b4284fd8
12 changed file
--- a/FPS_test.py
+++ b/FPS_test.py
@@ -17,20 +17,31 @@ video.py里面测试的FPS会低于该FPS，因为摄像头的读取频率有限
 '''
 class FPS_YOLO(YOLO):
    def get_FPS(self, image, test_interval):
-        # 调整图片使其符合输入要求
-        new_image_size = (self.model_image_size[1],self.model_image_size[0])
-        boxed_image = letterbox_image(image, new_image_size)
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        if self.letterbox_image:
+            boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
+        else:
+            boxed_image = image.convert('RGB')
+            boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
-        image_data = np.expand_dims(image_data, 0)  
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        image_data = np.expand_dims(image_data, 0)

+        #---------------------------------------------------------#
+        #   将图像输入网络当中进行预测！
+        #---------------------------------------------------------#
        out_boxes, out_scores, out_classes = self.sess.run(
            [self.boxes, self.scores, self.classes],
            feed_dict={
                self.yolo_model.input: image_data,
                self.input_image_shape: [image.size[1], image.size[0]],
-                K.learning_phase(): 0
-            })
+                K.learning_phase(): 0})

        t1 = time.time()
        for _ in range(test_interval):
@@ -39,8 +50,7 @@ class FPS_YOLO(YOLO):
                feed_dict={
                    self.yolo_model.input: image_data,
                    self.input_image_shape: [image.size[1], image.size[0]],
-                    K.learning_phase(): 0
-                })
+                    K.learning_phase(): 0})
        t2 = time.time()
        tact_time = (t2 - t1) / test_interval
        return tact_time

--- a/eval_coco.py
+++ b/eval_coco.py
+import colorsys
+import json
+import os
+
+import matplotlib.pyplot as plt
+import numpy as np
+import pylab
+from keras import backend as K
+from keras.applications.imagenet_utils import preprocess_input
+from keras.layers import Input
+from PIL import Image
+from tqdm import tqdm
+
+from nets.yolo4_tiny import yolo_body, yolo_eval
+from utils.utils import letterbox_image
+from yolo import YOLO
+
+coco_classes = {'person': 1, 'bicycle': 2, 'car': 3, 'motorbike': 4, 'aeroplane': 5, 
+    'bus': 6, 'train': 7, 'truck': 8, 'boat': 9, 'traffic light': 10, 'fire hydrant': 11, 
+    '': 83, 'stop sign': 13, 'parking meter': 14, 'bench': 15, 'bird': 16, 'cat': 17, 
+    'dog': 18, 'horse': 19, 'sheep': 20, 'cow': 21, 'elephant': 22, 'bear': 23, 'zebra': 24, 
+    'giraffe': 25, 'backpack': 27, 'umbrella': 28, 'handbag': 31, 'tie': 32, 'suitcase': 33, 
+    'frisbee': 34, 'skis': 35, 'snowboard': 36, 'sports ball': 37, 'kite': 38, 'baseball bat': 39, 
+    'baseball glove': 40, 'skateboard': 41, 'surfboard': 42, 'tennis racket': 43, 'bottle': 44, 
+    'wine glass': 46, 'cup': 47, 'fork': 48, 'knife': 49, 'spoon': 50, 'bowl': 51, 'banana': 52, 
+    'apple': 53, 'sandwich': 54, 'orange': 55, 'broccoli': 56, 'carrot': 57, 'hot dog': 58, 
+    'pizza': 59, 'donut': 60, 'cake': 61, 'chair': 62, 'sofa': 63, 'pottedplant': 64, 'bed': 65, 
+    'diningtable': 67, 'toilet': 70, 'tvmonitor': 72, 'laptop': 73, 'mouse': 74, 'remote': 75, 
+    'keyboard': 76, 'cell phone': 77, 'microwave': 78, 'oven': 79, 'toaster': 80, 'sink': 81, 
+    'refrigerator': 82, 'book': 84, 'clock': 85, 'vase': 86, 'scissors': 87, 'teddy bear': 88, 
+    'hair drier': 89, 'toothbrush': 90
+}
+
+class mAP_YOLO(YOLO):
+    #---------------------------------------------------#
+    #   获得所有的分类
+    #---------------------------------------------------#
+    def generate(self):
+        self.score = 0.01
+        self.iou = 0.5
+        model_path = os.path.expanduser(self.model_path)
+        assert model_path.endswith('.h5'), 'Keras model or weights must be a .h5 file.'
+        
+        # 计算anchor数量
+        num_anchors = len(self.anchors)
+        num_classes = len(self.class_names)
+
+        # 载入模型，如果原来的模型里已经包括了模型结构则直接载入。
+        # 否则先构建模型再载入
+        try:
+            self.yolo_model = load_model(model_path, compile=False)
+        except:
+            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
+            self.yolo_model.load_weights(self.model_path)
+        else:
+            assert self.yolo_model.layers[-1].output_shape[-1] == \
+                num_anchors/len(self.yolo_model.output) * (num_classes + 5), \
+                'Mismatch between model and given anchor and class sizes'
+
+        print('{} model, anchors, and classes loaded.'.format(model_path))
+
+        # 画框设置不同的颜色
+        hsv_tuples = [(x / len(self.class_names), 1., 1.)
+                      for x in range(len(self.class_names))]
+        self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples))
+        self.colors = list(
+            map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)),
+                self.colors))
+
+        # 打乱颜色
+        np.random.seed(10101)
+        np.random.shuffle(self.colors)
+        np.random.seed(None)
+
+        self.input_image_shape = K.placeholder(shape=(2, ))
+
+        #---------------------------------------------------------#
+        #   在yolo_eval函数中，我们会对预测结果进行后处理
+        #   后处理的内容包括，解码、非极大抑制、门限筛选等
+        #---------------------------------------------------------#
+        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
+                num_classes, self.input_image_shape, max_boxes = self.max_boxes,
+                score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image)
+        return boxes, scores, classes
+        
+    #---------------------------------------------------#
+    #   检测图片
+    #---------------------------------------------------#
+    def detect_image(self, image_id, image, results):
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
+        if self.letterbox_image:
+            boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
+        else:
+            boxed_image = image.convert('RGB')
+            boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
+        image_data = np.array(boxed_image, dtype='float32')
+        image_data /= 255.
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        image_data = np.expand_dims(image_data, 0)
+
+        #---------------------------------------------------------#
+        #   将图像输入网络当中进行预测！
+        #---------------------------------------------------------#
+        out_boxes, out_scores, out_classes = self.sess.run(
+            [self.boxes, self.scores, self.classes],
+            feed_dict={
+                self.yolo_model.input: image_data,
+                self.input_image_shape: [image.size[1], image.size[0]],
+                K.learning_phase(): 0})
+
+        for i, c in enumerate(out_classes):
+            result = {}
+            predicted_class = self.class_names[c]
+            top, left, bottom, right = out_boxes[i]
+
+            top = max(0, np.floor(top + 0.5).astype('int32'))
+            left = max(0, np.floor(left + 0.5).astype('int32'))
+            bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32'))
+            right = min(image.size[0], np.floor(right + 0.5).astype('int32'))
+
+            result["image_id"] = int(image_id)
+            result["category_id"] = coco_classes[predicted_class]
+            result["bbox"] = [float(left),float(top),float(right-left),float(bottom-top)]
+            result["score"] = float(out_scores[i])
+            results.append(result)
+
+        return results
+
+yolo = mAP_YOLO()
+
+jpg_names = os.listdir("./coco_dataset/val2017")
+
+with open("./coco_dataset/eval_results.json","w") as f:
+    results = []
+    for jpg_name in tqdm(jpg_names):
+        if jpg_name.endswith("jpg"):
+            image_path = "./coco_dataset/val2017/" + jpg_name
+            image = Image.open(image_path)
+            # 开启后在之后计算mAP可以可视化
+            results = yolo.detect_image(jpg_name.split(".")[0],image,results)
+    json.dump(results,f)
--- a/get_dr_txt.py
+++ b/get_dr_txt.py
@@ -13,7 +13,7 @@ from keras.layers import Input
 from PIL import Image
 from tqdm import tqdm

-from nets.yolo4 import yolo_body, yolo_eval
+from nets.yolo4_tiny import yolo_body, yolo_eval
 from utils.utils import letterbox_image
 from yolo import YOLO

@@ -41,7 +41,7 @@ class mAP_YOLO(YOLO):
        try:
            self.yolo_model = load_model(model_path, compile=False)
        except:
-            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
+            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
            self.yolo_model.load_weights(self.model_path)
        else:
            assert self.yolo_model.layers[-1].output_shape[-1] == \
@@ -71,7 +71,7 @@ class mAP_YOLO(YOLO):
        #---------------------------------------------------------#
        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
                num_classes, self.input_image_shape, max_boxes = self.max_boxes,
-                score_threshold = self.score, iou_threshold = self.iou)
+                score_threshold = self.score, iou_threshold = self.iou, letterbox_image=self.letterbox_image)
        return boxes, scores, classes

    #---------------------------------------------------#
@@ -81,9 +81,13 @@ class mAP_YOLO(YOLO):
        f = open("./input/detection-results/"+image_id+".txt","w") 
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
        #---------------------------------------------------------#
-        new_image_size = (self.model_image_size[1],self.model_image_size[0])
-        boxed_image = letterbox_image(image, new_image_size)
+        if self.letterbox_image:
+            boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
+        else:
+            boxed_image = image.convert('RGB')
+            boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        #---------------------------------------------------------#

--- a/kmeans_for_anchors.py
+++ b/kmeans_for_anchors.py
@@ -86,7 +86,7 @@ if __name__ == '__main__':
    # 运行该程序会计算'./VOCdevkit/VOC2007/Annotations'的xml
    # 会生成yolo_anchors.txt
    SIZE = 416
-    anchors_num = 9
+    anchors_num = 6
    # 载入数据集，可以使用VOC的xml
    path = r'./VOCdevkit/VOC2007/Annotations'
    

--- a/nets/CSPdarknet53_tiny.py
+++ b/nets/CSPdarknet53_tiny.py
+from functools import wraps
+
+import tensorflow as tf
+from keras import backend as K
+from keras.layers import (Add, Concatenate, Conv2D, Lambda, Layer,
+                          MaxPooling2D, UpSampling2D, ZeroPadding2D)
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers.normalization import BatchNormalization
+from keras.regularizers import l2
+from utils.utils import compose
+
+
+def route_group(input_layer, groups, group_id):
+    convs = tf.split(input_layer, num_or_size_splits=groups, axis=-1)
+    return convs[group_id]
+
+#--------------------------------------------------#
+#   单次卷积DarknetConv2D
+#   如果步长为2则自己设定padding方式。
+#   测试中发现没有l2正则化效果更好，所以去掉了l2正则化
+#--------------------------------------------------#
+@wraps(Conv2D)
+def DarknetConv2D(*args, **kwargs):
+    # darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
+    darknet_conv_kwargs = {}
+    darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
+    darknet_conv_kwargs.update(kwargs)
+    return Conv2D(*args, **darknet_conv_kwargs)
+
+#---------------------------------------------------#
+#   卷积块
+#   DarknetConv2D + BatchNormalization + LeakyReLU
+#---------------------------------------------------#
+def DarknetConv2D_BN_Leaky(*args, **kwargs):
+    no_bias_kwargs = {'use_bias': False}
+    no_bias_kwargs.update(kwargs)
+    return compose( 
+        DarknetConv2D(*args, **no_bias_kwargs),
+        BatchNormalization(),
+        LeakyReLU(alpha=0.1))
+
+'''
+                    input
+                      |
+            DarknetConv2D_BN_Leaky
+                      -----------------------
+                      |                     |
+                 route_group              route
+                      |                     |
+            DarknetConv2D_BN_Leaky          |
+                      |                     |
+    -------------------                     |
+    |                 |                     |
+ route_1    DarknetConv2D_BN_Leaky          |
+    |                 |                     |
+    -------------Concatenate                |
+                      |                     |
+        ----DarknetConv2D_BN_Leaky          |
+        |             |                     |
+      feat       Concatenate-----------------
+                      |
+                 MaxPooling2D
+'''
+#---------------------------------------------------#
+#   CSPdarknet_tiny的结构块
+#   存在一个大残差边
+#   这个大残差边绕过了很多的残差结构
+#---------------------------------------------------#
+def resblock_body(x, num_filters):
+    # 利用一个3x3卷积进行特征整合
+    x = DarknetConv2D_BN_Leaky(num_filters, (3,3))(x)
+    # 引出一个大的残差边route
+    route = x
+
+    # 对特征层的通道进行分割，取第二部分作为主干部分。
+    x = Lambda(route_group,arguments={'groups':2, 'group_id':1})(x) 
+    # 对主干部分进行3x3卷积
+    x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
+    # 引出一个小的残差边route_1
+    route_1 = x
+    # 对第主干部分进行3x3卷积
+    x = DarknetConv2D_BN_Leaky(int(num_filters/2), (3,3))(x)
+    # 主干部分与残差部分进行相接
+    x = Concatenate()([x, route_1])
+
+    # 对相接后的结果进行1x1卷积
+    x = DarknetConv2D_BN_Leaky(num_filters, (1,1))(x)
+    feat = x
+    x = Concatenate()([route, x])
+
+    # 利用最大池化进行高和宽的压缩
+    x = MaxPooling2D(pool_size=[2,2],)(x)
+
+    return x, feat
+
+#---------------------------------------------------#
+#   CSPdarknet_tiny的主体部分
+#---------------------------------------------------#
+def darknet_body(x):
+    # 首先利用两次步长为2x2的3x3卷积进行高和宽的压缩
+    # 416,416,3 -> 208,208,32 -> 104,104,64
+    x = ZeroPadding2D(((1,0),(1,0)))(x)
+    x = DarknetConv2D_BN_Leaky(32, (3,3), strides=(2,2))(x)
+    x = ZeroPadding2D(((1,0),(1,0)))(x)
+    x = DarknetConv2D_BN_Leaky(64, (3,3), strides=(2,2))(x)
+    
+    # 104,104,64 -> 52,52,128
+    x, _ = resblock_body(x,num_filters = 64)
+    # 52,52,128 -> 26,26,256
+    x, _ = resblock_body(x,num_filters = 128)
+    # 26,26,256 -> x为13,13,512
+    #           -> feat1为26,26,256
+    x, feat1 = resblock_body(x,num_filters = 256)
+    # 13,13,512 -> 13,13,512
+    x = DarknetConv2D_BN_Leaky(512, (3,3))(x)
+
+    feat2 = x
+    return feat1, feat2
+
--- a/nets/ious.py
+++ b/nets/ious.py
-from keras import backend as K
-import tensorflow as tf
 import math
+
+import tensorflow as tf
+from keras import backend as K
+
+
 def box_ciou(b1, b2):
    """
    输入为：

--- a/nets/loss.py
+++ b/nets/loss.py
@@ -99,21 +99,20 @@ def box_iou(b1, b2):
 #   loss值计算
 #---------------------------------------------------#
 def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1, print_loss=False, normalize=True):
-    # 一共有三层
+    # 一共有两层
    num_layers = len(anchors)//3 

    #---------------------------------------------------------------------------------------------------#
    #   将预测结果和实际ground truth分开，args是[*model_body.output, *y_true]
-    #   y_true是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
-    #   yolo_outputs是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
+    #   y_true是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)
+    #   yolo_outputs是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)
    #---------------------------------------------------------------------------------------------------#
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]

    #-----------------------------------------------------------#
-    #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
-    #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
-    #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
+    #   13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
+    #   26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
    #-----------------------------------------------------------#
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]

@@ -130,8 +129,8 @@ def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, label_smoothing=0.1,
    mf = K.cast(m, K.dtype(yolo_outputs[0]))

    #---------------------------------------------------------------------------------------------------#
-    #   y_true是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
-    #   yolo_outputs是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
+    #   y_true是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)
+    #   yolo_outputs是一个列表，包含两个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85)
    #---------------------------------------------------------------------------------------------------#
    for l in range(num_layers):
        #-----------------------------------------------------------#

--- a/nets/yolo4_tiny.py
+++ b/nets/yolo4_tiny.py
+from functools import wraps
+
+import numpy as np
+import tensorflow as tf
+from keras import backend as K
+from keras.layers import (Add, Concatenate, Conv2D, MaxPooling2D, UpSampling2D,
+                          ZeroPadding2D)
+from keras.layers.advanced_activations import LeakyReLU
+from keras.layers.normalization import BatchNormalization
+from keras.models import Model
+from keras.regularizers import l2
+from utils.utils import compose
+
+from nets.CSPdarknet53_tiny import darknet_body
+
+
+#--------------------------------------------------#
+#   单次卷积DarknetConv2D
+#   如果步长为2则自己设定padding方式。
+#   测试中发现没有l2正则化效果更好，所以去掉了l2正则化
+#--------------------------------------------------#
+@wraps(Conv2D)
+def DarknetConv2D(*args, **kwargs):
+    # darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)}
+    darknet_conv_kwargs = {}
+    darknet_conv_kwargs['padding'] = 'valid' if kwargs.get('strides')==(2,2) else 'same'
+    darknet_conv_kwargs.update(kwargs)
+    return Conv2D(*args, **darknet_conv_kwargs)
+
+
+#---------------------------------------------------#
+#   卷积块
+#   DarknetConv2D + BatchNormalization + LeakyReLU
+#---------------------------------------------------#
+def DarknetConv2D_BN_Leaky(*args, **kwargs):
+    no_bias_kwargs = {'use_bias': False}
+    no_bias_kwargs.update(kwargs)
+    return compose( 
+        DarknetConv2D(*args, **no_bias_kwargs),
+        BatchNormalization(),
+        LeakyReLU(alpha=0.1))
+
+#---------------------------------------------------#
+#   特征层->最后的输出
+#---------------------------------------------------#
+def yolo_body(inputs, num_anchors, num_classes):
+    #---------------------------------------------------#
+    #   生成CSPdarknet53_tiny的主干模型
+    #   feat1的shape为26,26,256
+    #   feat2的shape为13,13,512
+    #---------------------------------------------------#
+    feat1, feat2 = darknet_body(inputs)
+
+    # 13,13,512 -> 13,13,256
+    P5 = DarknetConv2D_BN_Leaky(256, (1,1))(feat2)
+    # 13,13,256 -> 13,13,512 -> 13,13,255
+    P5_output = DarknetConv2D_BN_Leaky(512, (3,3))(P5)
+    P5_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P5_output)
+    
+    # 13,13,256 -> 13,13,128 -> 26,26,128
+    P5_upsample = compose(DarknetConv2D_BN_Leaky(128, (1,1)), UpSampling2D(2))(P5)
+    
+    # 26,26,256 + 26,26,128 -> 26,26,384
+    P4 = Concatenate()([P5_upsample, feat1])
+    
+    # 26,26,384 -> 26,26,256 -> 26,26,255
+    P4_output = DarknetConv2D_BN_Leaky(256, (3,3))(P4)
+    P4_output = DarknetConv2D(num_anchors*(num_classes+5), (1,1))(P4_output)
+    
+    return Model(inputs, [P5_output, P4_output])
+
+#---------------------------------------------------#
+#   将预测值的每个特征层调成真实值
+#---------------------------------------------------#
+def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False):
+    num_anchors = len(anchors)
+    #---------------------------------------------------#
+    #   [1, 1, 1, num_anchors, 2]
+    #---------------------------------------------------#
+    anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2])
+
+    #---------------------------------------------------#
+    #   获得x，y的网格
+    #   (13, 13, 1, 2)
+    #---------------------------------------------------#
+    grid_shape = K.shape(feats)[1:3]
+    grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]),
+        [1, grid_shape[1], 1, 1])
+    grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]),
+        [grid_shape[0], 1, 1, 1])
+    grid = K.concatenate([grid_x, grid_y])
+    grid = K.cast(grid, K.dtype(feats))
+
+    #---------------------------------------------------#
+    #   将预测结果调整成(batch_size,13,13,3,85)
+    #   85可拆分成4 + 1 + 80
+    #   4代表的是中心宽高的调整参数
+    #   1代表的是框的置信度
+    #   80代表的是种类的置信度
+    #---------------------------------------------------#
+    feats = K.reshape(feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5])
+
+    #---------------------------------------------------#
+    #   将预测值调成真实值
+    #   box_xy对应框的中心点
+    #   box_wh对应框的宽和高
+    #---------------------------------------------------#
+    box_xy = (K.sigmoid(feats[..., :2]) + grid) / K.cast(grid_shape[::-1], K.dtype(feats))
+    box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / K.cast(input_shape[::-1], K.dtype(feats))
+    box_confidence = K.sigmoid(feats[..., 4:5])
+    box_class_probs = K.sigmoid(feats[..., 5:])
+
+    #---------------------------------------------------------------------#
+    #   在计算loss的时候返回grid, feats, box_xy, box_wh
+    #   在预测的时候返回box_xy, box_wh, box_confidence, box_class_probs
+    #---------------------------------------------------------------------#
+    if calc_loss == True:
+        return grid, feats, box_xy, box_wh
+    return box_xy, box_wh, box_confidence, box_class_probs
+
+#---------------------------------------------------#
+#   对box进行调整，使其符合真实图片的样子
+#---------------------------------------------------#
+def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape):
+    #-----------------------------------------------------------------#
+    #   把y轴放前面是因为方便预测框和图像的宽高进行相乘
+    #-----------------------------------------------------------------#
+    box_yx = box_xy[..., ::-1]
+    box_hw = box_wh[..., ::-1]
+    
+    input_shape = K.cast(input_shape, K.dtype(box_yx))
+    image_shape = K.cast(image_shape, K.dtype(box_yx))
+
+    new_shape = K.round(image_shape * K.min(input_shape/image_shape))
+    #-----------------------------------------------------------------#
+    #   这里求出来的offset是图像有效区域相对于图像左上角的偏移情况
+    #   new_shape指的是宽高缩放情况
+    #-----------------------------------------------------------------#
+    offset = (input_shape-new_shape)/2./input_shape
+    scale = input_shape/new_shape
+
+    box_yx = (box_yx - offset) * scale
+    box_hw *= scale
+
+    box_mins = box_yx - (box_hw / 2.)
+    box_maxes = box_yx + (box_hw / 2.)
+    boxes =  K.concatenate([
+        box_mins[..., 0:1],  # y_min
+        box_mins[..., 1:2],  # x_min
+        box_maxes[..., 0:1],  # y_max
+        box_maxes[..., 1:2]  # x_max
+    ])
+
+    boxes *= K.concatenate([image_shape, image_shape])
+    return boxes
+
+#---------------------------------------------------#
+#   获取每个box和它的得分
+#---------------------------------------------------#
+def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape, letterbox_image):
+    #-----------------------------------------------------------------#
+    #   将预测值调成真实值
+    #   box_xy : -1,13,13,3,2; 
+    #   box_wh : -1,13,13,3,2; 
+    #   box_confidence : -1,13,13,3,1; 
+    #   box_class_probs : -1,13,13,3,80;
+    #-----------------------------------------------------------------#
+    box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, anchors, num_classes, input_shape)
+    #-----------------------------------------------------------------#
+    #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+    #   因此生成的box_xy, box_wh是相对于有灰条的图像的
+    #   我们需要对齐进行修改，去除灰条的部分。
+    #   将box_xy、和box_wh调节成y_min,y_max,xmin,xmax
+    #-----------------------------------------------------------------#
+    if letterbox_image:
+        boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape)
+    else:
+        box_yx = box_xy[..., ::-1]
+        box_hw = box_wh[..., ::-1]
+        box_mins = box_yx - (box_hw / 2.)
+        box_maxes = box_yx + (box_hw / 2.)
+
+        input_shape = K.cast(input_shape, K.dtype(box_yx))
+        image_shape = K.cast(image_shape, K.dtype(box_yx))
+
+        boxes =  K.concatenate([
+            box_mins[..., 0:1] * image_shape[0],  # y_min
+            box_mins[..., 1:2] * image_shape[1],  # x_min
+            box_maxes[..., 0:1] * image_shape[0],  # y_max
+            box_maxes[..., 1:2] * image_shape[1]  # x_max
+        ])
+    #-----------------------------------------------------------------#
+    #   获得最终得分和框的位置
+    #-----------------------------------------------------------------#
+    boxes = K.reshape(boxes, [-1, 4])
+    box_scores = box_confidence * box_class_probs
+    box_scores = K.reshape(box_scores, [-1, num_classes])
+    return boxes, box_scores
+
+#---------------------------------------------------#
+#   图片预测
+#---------------------------------------------------#
+def yolo_eval(yolo_outputs,
+              anchors,
+              num_classes,
+              image_shape,
+              max_boxes=20,
+              score_threshold=.6,
+              iou_threshold=.5,
+              letterbox_image=True):
+    #---------------------------------------------------#
+    #   获得特征层的数量，有效特征层的数量为3
+    #---------------------------------------------------#
+    num_layers = len(yolo_outputs)
+    #-----------------------------------------------------------#
+    #   13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
+    #   26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
+    #-----------------------------------------------------------#
+    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
+    
+    #-----------------------------------------------------------#
+    #   这里获得的是输入图片的大小，一般是416x416
+    #-----------------------------------------------------------#
+    input_shape = K.shape(yolo_outputs[0])[1:3] * 32
+    boxes = []
+    box_scores = []
+    #-----------------------------------------------------------#
+    #   对每个特征层进行处理
+    #-----------------------------------------------------------#
+    for l in range(num_layers):
+        _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], anchors[anchor_mask[l]], num_classes, input_shape, image_shape, letterbox_image)
+        boxes.append(_boxes)
+        box_scores.append(_box_scores)
+    #-----------------------------------------------------------#
+    #   将每个特征层的结果进行堆叠
+    #-----------------------------------------------------------#
+    boxes = K.concatenate(boxes, axis=0)
+    box_scores = K.concatenate(box_scores, axis=0)
+
+    #-----------------------------------------------------------#
+    #   判断得分是否大于score_threshold
+    #-----------------------------------------------------------#
+    mask = box_scores >= score_threshold
+    max_boxes_tensor = K.constant(max_boxes, dtype='int32')
+    boxes_ = []
+    scores_ = []
+    classes_ = []
+    for c in range(num_classes):
+        #-----------------------------------------------------------#
+        #   取出所有box_scores >= score_threshold的框，和成绩
+        #-----------------------------------------------------------#
+        class_boxes = tf.boolean_mask(boxes, mask[:, c])
+        class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c])
+
+        #-----------------------------------------------------------#
+        #   非极大抑制
+        #   保留一定区域内得分最大的框
+        #-----------------------------------------------------------#
+        nms_index = tf.image.non_max_suppression(
+            class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold)
+
+        #-----------------------------------------------------------#
+        #   获取非极大抑制后的结果
+        #   下列三个分别是
+        #   框的位置，得分与种类
+        #-----------------------------------------------------------#
+        class_boxes = K.gather(class_boxes, nms_index)
+        class_box_scores = K.gather(class_box_scores, nms_index)
+        classes = K.ones_like(class_box_scores, 'int32') * c
+        boxes_.append(class_boxes)
+        scores_.append(class_box_scores)
+        classes_.append(classes)
+    boxes_ = K.concatenate(boxes_, axis=0)
+    scores_ = K.concatenate(scores_, axis=0)
+    classes_ = K.concatenate(classes_, axis=0)
+
+    return boxes_, scores_, classes_
+
+
--- a/predict.py
+++ b/predict.py
@@ -8,7 +8,6 @@ predict.py有几个注意点
 from keras.layers import Input
 from PIL import Image

-from nets.yolo4 import yolo_body
 from yolo import YOLO

 yolo = YOLO()

--- a/test.py
+++ b/test.py
@@ -5,7 +5,7 @@
 #--------------------------------------------#
 from keras.layers import Input

-from nets.yolo4 import yolo_body
+from nets.yolo4_tiny import yolo_body

 if __name__ == "__main__":
    inputs = Input([416, 416, 3])

--- a/train.py
+++ b/train.py
@@ -9,7 +9,7 @@ from keras.models import Model
 from keras.optimizers import Adam

 from nets.loss import yolo_loss
-from nets.yolo4 import yolo_body
+from nets.yolo4_tiny import yolo_body
 from utils.utils import (WarmUpCosineDecayScheduler, get_random_data,
                         get_random_data_with_Mosaic, rand)

@@ -67,20 +67,19 @@ def data_generator(annotation_lines, batch_size, input_shape, anchors, num_class
 #---------------------------------------------------#
 def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
    assert (true_boxes[..., 4]<num_classes).all(), 'class id must be less than num_classes'
-    # 一共有三个特征层数
+    # 一共有两个特征层数
    num_layers = len(anchors)//3
    #-----------------------------------------------------------#
-    #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
-    #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
-    #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
+    #   13x13的特征层对应的anchor是[81,82], [135,169], [344,319]
+    #   26x26的特征层对应的anchor是[23,27], [37,58], [81,82]
    #-----------------------------------------------------------#
-    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]]
+    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]

    #-----------------------------------------------------------#
    #   获得框的坐标和图片的大小
    #-----------------------------------------------------------#
    true_boxes = np.array(true_boxes, dtype='float32')
-    input_shape = np.array(input_shape, dtype='int32')
+    input_shape = np.array(input_shape, dtype='int32') 
    #-----------------------------------------------------------#
    #   通过计算获得真实框的中心和宽高
    #   中心点(m,n,2) 宽高(m,n,2)
@@ -103,7 +102,7 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):
        dtype='float32') for l in range(num_layers)]

    #-----------------------------------------------------------#
-    #   [9,2] -> [1,9,2]
+    #   [6,2] -> [1,6,2]
    #-----------------------------------------------------------#
    anchors = np.expand_dims(anchors, 0)
    anchor_maxes = anchors / 2.
@@ -127,10 +126,10 @@ def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes):

        #-----------------------------------------------------------#
        #   计算所有真实框和先验框的交并比
-        #   intersect_area  [n,9]
+        #   intersect_area  [n,6]
        #   box_area        [n,1]
-        #   anchor_area     [1,9]
-        #   iou             [n,9]
+        #   anchor_area     [1,6]
+        #   iou             [n,6]
        #-----------------------------------------------------------#
        intersect_mins = np.maximum(box_mins, anchor_mins)
        intersect_maxes = np.minimum(box_maxes, anchor_maxes)
@@ -200,7 +199,7 @@ if __name__ == "__main__":
    #   训练自己的数据集时提示维度不匹配正常
    #   预测的东西都不一样了自然维度不匹配
    #------------------------------------------------------#
-    weights_path = 'model_data/yolo4_weight.h5'
+    weights_path = 'model_data/yolov4_tiny_weights_coco.h5'
    #------------------------------------------------------#
    #   训练用图片大小
    #   一般在416x416和608x608选择
@@ -239,8 +238,8 @@ if __name__ == "__main__":
    #------------------------------------------------------#
    image_input = Input(shape=(None, None, 3))
    h, w = input_shape
-    print('Create YOLOv4 model with {} anchors and {} classes.'.format(num_anchors, num_classes))
-    model_body = yolo_body(image_input, num_anchors//3, num_classes)
+    print('Create YOLOv4-Tiny model with {} anchors and {} classes.'.format(num_anchors, num_classes))
+    model_body = yolo_body(image_input, num_anchors//2, num_classes)
    
    #------------------------------------------------------#
    #   载入预训练权重
@@ -252,12 +251,10 @@ if __name__ == "__main__":
    #   在这个地方设置损失，将网络的输出结果传入loss函数
    #   把整个模型的输出作为loss
    #------------------------------------------------------#
-    y_true = [Input(shape=(h//{0:32, 1:16, 2:8}[l], w//{0:32, 1:16, 2:8}[l], \
-        num_anchors//3, num_classes+5)) for l in range(3)]
+    y_true = [Input(shape=(h//{0:32, 1:16}[l], w//{0:32, 1:16}[l], num_anchors//2, num_classes+5)) for l in range(2)]
    loss_input = [*model_body.output, *y_true]
    model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss',
-        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 
-            'label_smoothing': label_smoothing, 'normalize': normalize})(loss_input)
+        arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5, 'label_smoothing': label_smoothing, 'normalize':normalize})(loss_input)

    model = Model([model_body.input, *y_true], model_loss)

@@ -287,10 +284,6 @@ if __name__ == "__main__":
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
    
-    freeze_layers = 249
-    for i in range(freeze_layers): model_body.layers[i].trainable = False
-    print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
-
    #------------------------------------------------------#
    #   主干特征提取网络特征通用，冻结训练可以加快训练速度
    #   也可以在训练初期防止权值被破坏。
@@ -299,10 +292,15 @@ if __name__ == "__main__":
    #   Epoch总训练世代
    #   提示OOM或者显存不足请调小Batch_size
    #------------------------------------------------------#
+    freeze_layers = 60
+    for i in range(freeze_layers): model_body.layers[i].trainable = False
+    print('Freeze the first {} layers of total {} layers.'.format(freeze_layers, len(model_body.layers)))
+
+    # 调整非主干模型first
    if True:
        Init_epoch = 0
        Freeze_epoch = 50
-        batch_size = 8
+        batch_size = 32
        learning_rate_base = 1e-3

        if Cosine_scheduler:
@@ -326,9 +324,9 @@ if __name__ == "__main__":
            model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})

        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
-        model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
+        model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
                steps_per_epoch=max(1, num_train//batch_size),
-                validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
+                validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
                validation_steps=max(1, num_val//batch_size),
                epochs=Freeze_epoch,
                initial_epoch=Init_epoch,
@@ -337,10 +335,11 @@ if __name__ == "__main__":

    for i in range(freeze_layers): model_body.layers[i].trainable = True

+    # 解冻后训练
    if True:
        Freeze_epoch = 50
        Epoch = 100
-        batch_size = 2
+        batch_size = 16
        learning_rate_base = 1e-4

        if Cosine_scheduler:
@@ -364,9 +363,9 @@ if __name__ == "__main__":
            model.compile(optimizer=Adam(learning_rate_base), loss={'yolo_loss': lambda y_true, y_pred: y_pred})

        print('Train on {} samples, val on {} samples, with batch size {}.'.format(num_train, num_val, batch_size))
-        model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic, random=True),
+        model.fit_generator(data_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, mosaic=mosaic),
                steps_per_epoch=max(1, num_train//batch_size),
-                validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False, random=False),
+                validation_data=data_generator(lines[num_train:], batch_size, input_shape, anchors, num_classes, mosaic=False),
                validation_steps=max(1, num_val//batch_size),
                epochs=Epoch,
                initial_epoch=Freeze_epoch,

--- a/yolo.py
+++ b/yolo.py
+import collections
 import colorsys
 import copy
 import os
@@ -9,7 +10,7 @@ from keras.layers import Input
 from keras.models import load_model
 from PIL import Image, ImageDraw, ImageFont

-from nets.yolo4 import yolo_body, yolo_eval
+from nets.yolo4_tiny import yolo_body, yolo_eval
 from utils.utils import letterbox_image


@@ -21,7 +22,7 @@ from utils.utils import letterbox_image
 #--------------------------------------------#
 class YOLO(object):
    _defaults = {
-        "model_path"        : 'model_data/yolo4_weight.h5',
+        "model_path"        : 'model_data/yolov4_tiny_weights_coco.h5',
        "anchors_path"      : 'model_data/yolo_anchors.txt',
        "classes_path"      : 'model_data/coco_classes.txt',
        "score"             : 0.5,
@@ -29,7 +30,12 @@ class YOLO(object):
        "max_boxes"         : 100,
        # 显存比较小可以使用416x416
        # 显存比较大可以使用608x608
-        "model_image_size"  : (416, 416)
+        "model_image_size"  : (416, 416),
+        #---------------------------------------------------------------------#
+        #   该变量用于控制是否使用letterbox_image对输入图像进行不失真的resize，
+        #   在多次测试后，发现关闭letterbox_image直接resize的效果更好
+        #---------------------------------------------------------------------#
+        "letterbox_image"   : False,
    }

    @classmethod
@@ -89,7 +95,7 @@ class YOLO(object):
        try:
            self.yolo_model = load_model(model_path, compile=False)
        except:
-            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//3, num_classes)
+            self.yolo_model = yolo_body(Input(shape=(None,None,3)), num_anchors//2, num_classes)
            self.yolo_model.load_weights(self.model_path)
        else:
            assert self.yolo_model.layers[-1].output_shape[-1] == \
@@ -119,19 +125,22 @@ class YOLO(object):
        #---------------------------------------------------------#
        boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors,
                num_classes, self.input_image_shape, max_boxes = self.max_boxes,
-                score_threshold = self.score, iou_threshold = self.iou)
+                score_threshold = self.score, iou_threshold = self.iou, letterbox_image = self.letterbox_image)
        return boxes, scores, classes

    #---------------------------------------------------#
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
-        start = timer()
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
        #---------------------------------------------------------#
-        new_image_size = (self.model_image_size[1],self.model_image_size[0])
-        boxed_image = letterbox_image(image, new_image_size)
+        if self.letterbox_image:
+            boxed_image = letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))
+        else:
+            boxed_image = image.convert('RGB')
+            boxed_image = boxed_image.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
        image_data = np.array(boxed_image, dtype='float32')
        image_data /= 255.
        #---------------------------------------------------------#
@@ -197,8 +206,6 @@ class YOLO(object):
            draw.text(text_origin, str(label,'UTF-8'), fill=(0, 0, 0), font=font)
            del draw

-        end = timer()
-        print(end - start)
        return image

    def close_session(self):