Add files via upload

30094fa5 · Bubbliiiing · GitHub · 742158d5 · 30094fa5 · 30094fa5
15 changed file
--- a/FPS_test.py
+++ b/FPS_test.py
@@ -25,20 +25,20 @@ video.py里面测试的FPS会低于该FPS，因为摄像头的读取频率有限
 '''
 class FPS_YOLO(YOLO):
    def get_FPS(self, image, test_interval):
-        # 调整图片使其符合输入要求
        image_shape = np.array(np.shape(image)[0:2])
-
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-        images = np.asarray(images)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]

        with torch.no_grad():
-            images = torch.from_numpy(images)
+            images = torch.from_numpy(np.asarray(images))
            if self.cuda:
                images = images.cuda()
            outputs = self.net(images)

--- a/VOCdevkit/VOC2007/voc2yolo3.py
+++ b/VOCdevkit/VOC2007/voc2yolo3.py
+
+#----------------------------------------------------------------------#
+#   验证集的划分在train.py代码里面进行
+#   test.txt和val.txt里面没有内容是正常的。训练不会使用到。
+#----------------------------------------------------------------------#
 import os
 import random 
-random.seed(0)
-
+ 
 xmlfilepath=r'./VOCdevkit/VOC2007/Annotations'
 saveBasePath=r"./VOCdevkit/VOC2007/ImageSets/Main/"
 
+#----------------------------------------------------------------------#
+#   想要增加测试集修改trainval_percent
+#   train_percent不需要修改
+#----------------------------------------------------------------------#
 trainval_percent=1
 train_percent=1


--- a/get_dr_txt.py
+++ b/get_dr_txt.py
-#-------------------------------------#
-#       mAP所需文件计算代码
-#       具体教程请查看Bilibili
-#       Bubbliiiing
-#-------------------------------------#
-import cv2
-import numpy as np
+#----------------------------------------------------#
+#   获取测试集的detection-result和images-optional
+#   具体视频教程可查看
+#   https://www.bilibili.com/video/BV1zE411u7Vw
+#----------------------------------------------------#
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
 import torch.backends.cudnn as cudnn
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
-from yolo import YOLO
+from tqdm import tqdm
+
 from nets.yolo3 import YoloBody
-from PIL import Image,ImageFont, ImageDraw
 from utils.config import Config
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
-from tqdm import tqdm
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+from yolo import YOLO
+

 class mAP_Yolo(YOLO):
    #---------------------------------------------------#
@@ -28,40 +32,61 @@ class mAP_Yolo(YOLO):
        f = open("./input/detection-results/"+image_id+".txt","w") 
        image_shape = np.array(np.shape(image)[0:2])

+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
-
-        images = np.asarray(images)
-        images = torch.from_numpy(images)
-        if self.cuda:
-            images = images.cuda()
-        
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
+
        with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
+                
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                    conf_thres=self.confidence,
                                                    nms_thres=self.iou)
-        try :
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                                                    
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try :
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
+
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]

--- a/nets/darknet.py
+++ b/nets/darknet.py
-import torch
-import torch.nn as nn
 import math
 from collections import OrderedDict

-# 基本的darknet块
+import torch
+import torch.nn as nn
+
+
+#---------------------------------------------------------------------#
+#   残差结构
+#   利用一个1x1卷积下降通道数，然后利用一个3x3卷积提取特征并且上升通道数
+#   最后接上一个残差边
+#---------------------------------------------------------------------#
 class BasicBlock(nn.Module):
    def __init__(self, inplanes, planes):
        super(BasicBlock, self).__init__()
@@ -36,14 +42,20 @@ class DarkNet(nn.Module):
    def __init__(self, layers):
        super(DarkNet, self).__init__()
        self.inplanes = 32
+        # 416,416,3 -> 416,416,32
        self.conv1 = nn.Conv2d(3, self.inplanes, kernel_size=3, stride=1, padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(self.inplanes)
        self.relu1 = nn.LeakyReLU(0.1)

+        # 416,416,32 -> 208,208,64
        self.layer1 = self._make_layer([32, 64], layers[0])
+        # 208,208,64 -> 104,104,128
        self.layer2 = self._make_layer([64, 128], layers[1])
+        # 104,104,128 -> 52,52,256
        self.layer3 = self._make_layer([128, 256], layers[2])
+        # 52,52,256 -> 26,26,512
        self.layer4 = self._make_layer([256, 512], layers[3])
+        # 26,26,512 -> 13,13,1024
        self.layer5 = self._make_layer([512, 1024], layers[4])

        self.layers_out_filters = [64, 128, 256, 512, 1024]
@@ -57,6 +69,10 @@ class DarkNet(nn.Module):
                m.weight.data.fill_(1)
                m.bias.data.zero_()

+    #---------------------------------------------------------------------#
+    #   在每一个layer里面，首先利用一个步长为2的3x3卷积进行下采样
+    #   然后进行残差结构的堆叠
+    #---------------------------------------------------------------------#
    def _make_layer(self, planes, blocks):
        layers = []
        # 下采样，步长为2，卷积核大小为3
@@ -64,7 +80,7 @@ class DarkNet(nn.Module):
                                stride=2, padding=1, bias=False)))
        layers.append(("ds_bn", nn.BatchNorm2d(planes[1])))
        layers.append(("ds_relu", nn.LeakyReLU(0.1)))
-        # 加入darknet模块   
+        # 加入残差结构
        self.inplanes = planes[1]
        for i in range(0, blocks):
            layers.append(("residual_{}".format(i), BasicBlock(self.inplanes, planes)))

--- a/nets/yolo3.py
+++ b/nets/yolo3.py
+from collections import OrderedDict
+
 import torch
 import torch.nn as nn
-from collections import OrderedDict
+
 from nets.darknet import darknet53

+
 def conv2d(filter_in, filter_out, kernel_size):
    pad = (kernel_size - 1) // 2 if kernel_size else 0
    return nn.Sequential(OrderedDict([
@@ -11,6 +14,10 @@ def conv2d(filter_in, filter_out, kernel_size):
        ("relu", nn.LeakyReLU(0.1)),
    ]))

+#------------------------------------------------------------------------#
+#   make_last_layers里面一共有七个卷积，前五个用于提取特征。
+#   后两个用于获得yolo网络的预测结果
+#------------------------------------------------------------------------#
 def make_last_layers(filters_list, in_filters, out_filter):
    m = nn.ModuleList([
        conv2d(in_filters, filters_list[0], 1),
@@ -28,21 +35,30 @@ class YoloBody(nn.Module):
    def __init__(self, config):
        super(YoloBody, self).__init__()
        self.config = config
-        #  backbone
+        #---------------------------------------------------#   
+        #   生成darknet53的主干模型
+        #   获得三个有效特征层，他们的shape分别是：
+        #   13,13,256
+        #   26,26,512
+        #   13,13,1024
+        #---------------------------------------------------#
        self.backbone = darknet53(None)

+        # out_filters : [64, 128, 256, 512, 1024]
        out_filters = self.backbone.layers_out_filters
-        #  last_layer0
+
+        #------------------------------------------------------------------------#
+        #   计算yolo_head的输出通道数，对于voc数据集而言
+        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
+        #------------------------------------------------------------------------#
        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)

-        #  embedding1
        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)

-        #  embedding2
        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
@@ -56,21 +72,43 @@ class YoloBody(nn.Module):
                if i == 4:
                    out_branch = layer_in
            return layer_in, out_branch
-        #  backbone
+        #---------------------------------------------------#   
+        #   获得三个有效特征层，他们的shape分别是：
+        #   13,13,256；26,26,512；13,13,1024
+        #---------------------------------------------------#
        x2, x1, x0 = self.backbone(x)
-        #  yolo branch 0
+
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   out0 = (batch_size,255,13,13)
+        #---------------------------------------------------#
+        # 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
        out0, out0_branch = _branch(self.last_layer0, x0)

-        #  yolo branch 1
+        # 13,13,512 -> 13,13,256 -> 26,26,256
        x1_in = self.last_layer1_conv(out0_branch)
        x1_in = self.last_layer1_upsample(x1_in)
+
+        # 26,26,256 + 26,26,512 -> 26,26,768
        x1_in = torch.cat([x1_in, x1], 1)
+        #---------------------------------------------------#
+        #   第二个特征层
+        #   out1 = (batch_size,255,26,26)
+        #---------------------------------------------------#
+        # 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
        out1, out1_branch = _branch(self.last_layer1, x1_in)

-        #  yolo branch 2
+        # 26,26,256 -> 26,26,128 -> 52,52,128
        x2_in = self.last_layer2_conv(out1_branch)
        x2_in = self.last_layer2_upsample(x2_in)
+
+        # 52,52,128 + 52,52,256 -> 52,52,384
        x2_in = torch.cat([x2_in, x2], 1)
+        #---------------------------------------------------#
+        #   第一个特征层
+        #   out3 = (batch_size,255,52,52)
+        #---------------------------------------------------#
+        # 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
        out2, _ = _branch(self.last_layer2, x2_in)
        return out0, out1, out2

--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
--- a/predict.py
+++ b/predict.py
-#-------------------------------------#
-#       对单张图片进行预测
-#-------------------------------------#
-from yolo import YOLO
+'''
+predict.py有几个注意点
+1、无法进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+2、如果想要保存，利用r_image.save("img.jpg")即可保存。
+3、如果想要获得框的坐标，可以进入detect_image函数，读取top,left,bottom,right这四个值。
+4、如果想要截取下目标，可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
+'''
 from PIL import Image

+from yolo import YOLO
+
 yolo = YOLO()

 while True:

--- a/test.py
+++ b/test.py
@@ -5,6 +5,7 @@
 #--------------------------------------------#
 import torch
 from torchsummary import summary
+
 from nets.yolo3 import YoloBody
 from utils.config import Config


--- a/train.py
+++ b/train.py
@@ -2,21 +2,24 @@
 #       对数据集进行训练
 #-------------------------------------#
 import os
-import numpy as np
 import time
+
+import numpy as np
 import torch
-from torch.autograd import Variable
+import torch.backends.cudnn as cudnn
 import torch.nn as nn
-import torch.optim as optim
 import torch.nn.functional as F
-import torch.backends.cudnn as cudnn
-from utils.config import Config
+import torch.optim as optim
+from torch.autograd import Variable
 from torch.utils.data import DataLoader
-from utils.dataloader import yolo_dataset_collate, YoloDataset
-from nets.yolo_training import YOLOLoss,Generator
-from nets.yolo3 import YoloBody
 from tqdm import tqdm

+from nets.yolo3 import YoloBody
+from nets.yolo_training import Generator, YOLOLoss
+from utils.config import Config
+from utils.dataloader import YoloDataset, yolo_dataset_collate
+
+
 def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
@@ -24,7 +27,8 @@ def get_lr(optimizer):
 def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
    total_loss = 0
    val_loss = 0
-    start_time = time.time()
+
+    net.train()
    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
@@ -37,25 +41,38 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                else:
                    images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
                    targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+
+            #----------------------#
+            #   清零梯度
+            #----------------------#
            optimizer.zero_grad()
+            #----------------------#
+            #   前向传播
+            #----------------------#
            outputs = net(images)
            losses = []
+            num_pos_all = 0
+            #----------------------#
+            #   计算损失
+            #----------------------#
            for i in range(3):
-                loss_item = yolo_losses[i](outputs[i], targets)
-                losses.append(loss_item[0])
-            loss = sum(losses)
+                loss_item, num_pos = yolo_losses[i](outputs[i], targets)
+                losses.append(loss_item)
+                num_pos_all += num_pos
+
+            loss = sum(losses) / num_pos
+            #----------------------#
+            #   反向传播
+            #----------------------#
            loss.backward()
            optimizer.step()

-            total_loss += loss
-            waste_time = time.time() - start_time
+            total_loss += loss.item()
            
-            pbar.set_postfix(**{'total_loss': total_loss.item() / (iteration + 1), 
-                                'lr'        : get_lr(optimizer),
-                                'step/s'    : waste_time})
+            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
+                                'lr'        : get_lr(optimizer)})
            pbar.update(1)

-            start_time = time.time()
    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
@@ -74,14 +91,15 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                optimizer.zero_grad()
                outputs = net(images_val)
                losses = []
+                num_pos_all = 0
                for i in range(3):
-                    loss_item = yolo_losses[i](outputs[i], targets_val)
-                    losses.append(loss_item[0])
-                loss = sum(losses)
-                val_loss += loss
-            pbar.set_postfix(**{'total_loss': val_loss.item() / (iteration + 1)})
+                    loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
+                    losses.append(loss_item)
+                    num_pos_all += num_pos
+                loss = sum(losses) / num_pos
+                val_loss += loss.item()
+            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)
-    net.train()
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
@@ -94,22 +112,33 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
 #   https://www.bilibili.com/video/BV1zE411u7Vw
 #----------------------------------------------------#
 if __name__ == "__main__":
-    # 参数初始化
-    annotation_path = '2007_train.txt'
-    model = YoloBody(Config)
+    #-------------------------------#
+    #   是否使用Cuda
+    #   没有GPU可以设置成False
+    #-------------------------------#
    Cuda = True
    #-------------------------------#
    #   Dataloder的使用
    #-------------------------------#
    Use_Data_Loader = True
+    #------------------------------------------------------#
+    #   是否对损失进行归一化
+    #------------------------------------------------------#
+    normalize = True
+    #------------------------------------------------------#
+    #   创建yolo模型
+    #   训练前一定要修改Config里面的classes参数
+    #------------------------------------------------------#
+    model = YoloBody(Config)

-    #-------------------------------------------#
-    #   权值文件的下载请看README
-    #-------------------------------------------#
+    #------------------------------------------------------#
+    #   权值文件请看README，百度网盘下载
+    #------------------------------------------------------#
+    model_path = "model_data/yolo_weights.pth"
    print('Loading weights into state dict...')
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_dict = model.state_dict()
-    pretrained_dict = torch.load("model_data/yolo_weights.pth", map_location=device)
+    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
    model.load_state_dict(model_dict)
@@ -126,9 +155,17 @@ if __name__ == "__main__":
    yolo_losses = []
    for i in range(3):
        yolo_losses.append(YOLOLoss(np.reshape(Config["yolo"]["anchors"],[-1,2]),
-                                    Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda))
+                                    Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda, normalize))

-    # 0.1用于验证，0.9用于训练
+    #----------------------------------------------------#
+    #   获得图片路径和标签
+    #----------------------------------------------------#
+    annotation_path = '2007_train.txt'
+    #----------------------------------------------------------------------#
+    #   验证集的划分在train.py代码里面进行
+    #   2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
+    #   当前划分方式下，验证集和训练集的比例为1:9
+    #----------------------------------------------------------------------#
    val_split = 0.1
    with open(annotation_path) as f:
        lines = f.readlines()
@@ -138,17 +175,15 @@ if __name__ == "__main__":
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
    
-
    #------------------------------------------------------#
    #   主干特征提取网络特征通用，冻结训练可以加快训练速度
    #   也可以在训练初期防止权值被破坏。
    #   Init_Epoch为起始世代
    #   Freeze_Epoch为冻结训练的世代
-    #   Epoch总训练世代
+    #   Unfreeze_Epoch总训练世代
    #   提示OOM或者显存不足请调小Batch_size
    #------------------------------------------------------#
    if True:
-        # 最开始使用1e-3的学习率可以收敛的更快
        lr = 1e-3
        Batch_size = 8
        Init_Epoch = 0
@@ -158,17 +193,17 @@ if __name__ == "__main__":
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)

        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]))
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]))
+            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(True)
            gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(False)
                        
        epoch_size = num_train//Batch_size
        epoch_size_val = num_val//Batch_size
@@ -190,18 +225,19 @@ if __name__ == "__main__":

        optimizer = optim.Adam(net.parameters(),lr)
        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.95)
+        
        if Use_Data_Loader:
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]))
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]))
+            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
        else:
            gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(True)
            gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate()
+                             (Config["img_h"], Config["img_w"])).generate(False)
                        
        epoch_size = num_train//Batch_size
        epoch_size_val = num_val//Batch_size

--- a/utils/config.py
+++ b/utils/config.py
 Config = \
-{
+{   
+    #-------------------------------------------------------------#
+    #   训练前一定要修改classes参数
+    #   anchors可以不修改，因为anchors的通用性较大
+    #   而且大中小的设置非常符合yolo的特征层情况
+    #-------------------------------------------------------------#
    "yolo": {
        "anchors": [[[116, 90], [156, 198], [373, 326]],
                    [[30, 61], [62, 45], [59, 119]],
                    [[10, 13], [16, 30], [33, 23]]],
        "classes": 20,
    },
+    #-------------------------------------------------------------#
+    #   img_h和img_w可以修改成608x608
+    #-------------------------------------------------------------#
    "img_h": 416,
    "img_w": 416,
 }
--- a/utils/dataloader.py
+++ b/utils/dataloader.py
@@ -13,12 +13,13 @@ from nets.yolo_training import Generator
 import cv2

 class YoloDataset(Dataset):
-    def __init__(self, train_lines, image_size):
+    def __init__(self, train_lines, image_size, is_train):
        super(YoloDataset, self).__init__()

        self.train_lines = train_lines
        self.train_batches = len(train_lines)
        self.image_size = image_size
+        self.is_train = is_train

    def __len__(self):
        return self.train_batches
@@ -26,7 +27,7 @@ class YoloDataset(Dataset):
    def rand(self, a=0, b=1):
        return np.random.rand() * (b - a) + a

-    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5):
+    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
        """实时数据增强的随机预处理"""
        line = annotation_line.split()
        image = Image.open(line[0])
@@ -34,6 +35,35 @@ class YoloDataset(Dataset):
        h, w = input_shape
        box = np.array([np.array(list(map(int, box.split(',')))) for box in line[1:]])

+        if not random:
+            scale = min(w/iw, h/ih)
+            nw = int(iw*scale)
+            nh = int(ih*scale)
+            dx = (w-nw)//2
+            dy = (h-nh)//2
+
+            image = image.resize((nw,nh), Image.BICUBIC)
+            new_image = Image.new('RGB', (w,h), (128,128,128))
+            new_image.paste(image, (dx, dy))
+            image_data = np.array(new_image, np.float32)
+
+            # 调整目标框坐标
+            box_data = np.zeros((len(box), 5))
+            if len(box) > 0:
+                np.random.shuffle(box)
+                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
+                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+                box[:, 0:2][box[:, 0:2] < 0] = 0
+                box[:, 2][box[:, 2] > w] = w
+                box[:, 3][box[:, 3] > h] = h
+                box_w = box[:, 2] - box[:, 0]
+                box_h = box[:, 3] - box[:, 1]
+                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
+                box_data = np.zeros((len(box), 5))
+                box_data[:len(box)] = box
+
+            return image_data, box_data
+            
        # 调整图片大小
        new_ar = w / h * self.rand(1 - jitter, 1 + jitter) / self.rand(1 - jitter, 1 + jitter)
        scale = self.rand(.25, 2)
@@ -48,8 +78,7 @@ class YoloDataset(Dataset):
        # 放置图片
        dx = int(self.rand(0, w - nw))
        dy = int(self.rand(0, h - nh))
-        new_image = Image.new('RGB', (w, h),
-                              (np.random.randint(0, 255), np.random.randint(0, 255), np.random.randint(0, 255)))
+        new_image = Image.new('RGB', (w, h), (128, 128, 128))
        new_image.paste(image, (dx, dy))
        image = new_image

@@ -89,19 +118,18 @@ class YoloDataset(Dataset):
            box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
            box_data = np.zeros((len(box), 5))
            box_data[:len(box)] = box
-        if len(box) == 0:
-            return image_data, []
-
-        if (box_data[:, :4] > 0).any():
-            return image_data, box_data
-        else:
-            return image_data, []
+            
+        return image_data, box_data

    def __getitem__(self, index):
        lines = self.train_lines
        n = self.train_batches
        index = index % n
-        img, y = self.get_random_data(lines[index], self.image_size[0:2])
+        if self.is_train:
+            img, y = self.get_random_data(lines[index], self.image_size[0:2])
+        else:
+            img, y = self.get_random_data(lines[index], self.image_size[0:2], False)
+
        if len(y) != 0:
            # 从坐标转换成0~1的百分比
            boxes = np.array(y[:, :4], dtype=np.float32)

--- a/utils/utils.py
+++ b/utils/utils.py
 from __future__ import division
-import os
+
 import math
+import os
 import time
+
+import numpy as np
 import torch
 import torch.nn as nn
 import torch.nn.functional as F
-import numpy as np
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
 from torchvision.ops import nms
-from PIL import Image, ImageDraw, ImageFont
+

 class DecodeBox(nn.Module):
    def __init__(self, anchors, num_classes, img_size):
        super(DecodeBox, self).__init__()
+        #-----------------------------------------------------------#
+        #   13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
+        #   26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
+        #   52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
+        #-----------------------------------------------------------#
        self.anchors = anchors
        self.num_anchors = len(anchors)
        self.num_classes = num_classes
@@ -20,17 +28,33 @@ class DecodeBox(nn.Module):
        self.img_size = img_size

    def forward(self, input):
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 255, 13, 13
+        #   batch_size, 255, 26, 26
+        #   batch_size, 255, 52, 52
+        #-----------------------------------------------#
        batch_size = input.size(0)
        input_height = input.size(2)
        input_width = input.size(3)

-        # 计算步长
+        #-----------------------------------------------#
+        #   输入为416x416时
+        #   stride_h = stride_w = 32、16、8
+        #-----------------------------------------------#
        stride_h = self.img_size[1] / input_height
        stride_w = self.img_size[0] / input_width
-        # 归一到特征层上
+        #-------------------------------------------------#
+        #   此时获得的scaled_anchors大小是相对于特征层的
+        #-------------------------------------------------#
        scaled_anchors = [(anchor_width / stride_w, anchor_height / stride_h) for anchor_width, anchor_height in self.anchors]

-        # 对预测结果进行resize
+        #-----------------------------------------------#
+        #   输入的input一共有三个，他们的shape分别是
+        #   batch_size, 3, 13, 13, 85
+        #   batch_size, 3, 26, 26, 85
+        #   batch_size, 3, 52, 52, 85
+        #-----------------------------------------------#
        prediction = input.view(batch_size, self.num_anchors,
                                self.bbox_attrs, input_height, input_width).permute(0, 1, 3, 4, 2).contiguous()

@@ -38,37 +62,48 @@ class DecodeBox(nn.Module):
        x = torch.sigmoid(prediction[..., 0])  
        y = torch.sigmoid(prediction[..., 1])
        # 先验框的宽高调整参数
-        w = prediction[..., 2]  # Width
-        h = prediction[..., 3]  # Height
-
+        w = prediction[..., 2]
+        h = prediction[..., 3]
        # 获得置信度，是否有物体
        conf = torch.sigmoid(prediction[..., 4])
        # 种类置信度
-        pred_cls = torch.sigmoid(prediction[..., 5:])  # Cls pred.
+        pred_cls = torch.sigmoid(prediction[..., 5:])

        FloatTensor = torch.cuda.FloatTensor if x.is_cuda else torch.FloatTensor
        LongTensor = torch.cuda.LongTensor if x.is_cuda else torch.LongTensor

-        # 生成网格，先验框中心，网格左上角 batch_size,3,13,13
+        #----------------------------------------------------------#
+        #   生成网格，先验框中心，网格左上角 
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
        grid_x = torch.linspace(0, input_width - 1, input_width).repeat(input_height, 1).repeat(
            batch_size * self.num_anchors, 1, 1).view(x.shape).type(FloatTensor)
        grid_y = torch.linspace(0, input_height - 1, input_height).repeat(input_width, 1).t().repeat(
            batch_size * self.num_anchors, 1, 1).view(y.shape).type(FloatTensor)

-        # 生成先验框的宽高
+        #----------------------------------------------------------#
+        #   按照网格格式生成先验框的宽高
+        #   batch_size,3,13,13
+        #----------------------------------------------------------#
        anchor_w = FloatTensor(scaled_anchors).index_select(1, LongTensor([0]))
        anchor_h = FloatTensor(scaled_anchors).index_select(1, LongTensor([1]))
        anchor_w = anchor_w.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(w.shape)
        anchor_h = anchor_h.repeat(batch_size, 1).repeat(1, 1, input_height * input_width).view(h.shape)

-        # 计算调整后的先验框中心与宽高
+        #----------------------------------------------------------#
+        #   利用预测结果对先验框进行调整
+        #   首先调整先验框的中心，从先验框中心向右下角偏移
+        #   再调整先验框的宽高。
+        #----------------------------------------------------------#
        pred_boxes = FloatTensor(prediction[..., :4].shape)
        pred_boxes[..., 0] = x.data + grid_x
        pred_boxes[..., 1] = y.data + grid_y
        pred_boxes[..., 2] = torch.exp(w.data) * anchor_w
        pred_boxes[..., 3] = torch.exp(h.data) * anchor_h

-        # 用于将输出调整为相对于416x416的大小
+        #----------------------------------------------------------#
+        #   将输出结果调整成相对于输入图像大小
+        #----------------------------------------------------------#
        _scale = torch.Tensor([stride_w, stride_h] * 2).type(FloatTensor)
        output = torch.cat((pred_boxes.view(batch_size, -1, 4) * _scale,
                            conf.view(batch_size, -1, 1), pred_cls.view(batch_size, -1, self.num_classes)), -1)
@@ -139,7 +174,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):


 def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
-    # 求左上角和右下角
+    #----------------------------------------------------------#
+    #   将预测结果的格式转换成左上角右下角的格式。
+    #   prediction  [batch_size, num_anchors, 85]
+    #----------------------------------------------------------#
    box_corner = prediction.new(prediction.shape)
    box_corner[:, :, 0] = prediction[:, :, 0] - prediction[:, :, 2] / 2
    box_corner[:, :, 1] = prediction[:, :, 1] - prediction[:, :, 3] / 2
@@ -149,21 +187,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):

    output = [None for _ in range(len(prediction))]
    for image_i, image_pred in enumerate(prediction):
-        # 获得种类及其置信度
+        #----------------------------------------------------------#
+        #   对种类预测部分取max。
+        #   class_conf  [batch_size, num_anchors, 1]    种类置信度
+        #   class_pred  [batch_size, num_anchors, 1]    种类
+        #----------------------------------------------------------#
        class_conf, class_pred = torch.max(image_pred[:, 5:5 + num_classes], 1, keepdim=True)

-        # 利用置信度进行第一轮筛选
-        conf_mask = (image_pred[:, 4]*class_conf[:, 0] >= conf_thres).squeeze()
+        #----------------------------------------------------------#
+        #   利用置信度进行第一轮筛选
+        #----------------------------------------------------------#
+        conf_mask = (image_pred[:, 4] * class_conf[:, 0] >= conf_thres).squeeze()

+        #----------------------------------------------------------#
+        #   根据置信度进行预测结果的筛选
+        #----------------------------------------------------------#
        image_pred = image_pred[conf_mask]
        class_conf = class_conf[conf_mask]
        class_pred = class_pred[conf_mask]
        if not image_pred.size(0):
            continue
-        # 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
+        #-------------------------------------------------------------------------#
+        #   detections  [batch_size, num_anchors, 7]
+        #   7的内容为：x1, y1, x2, y2, obj_conf, class_conf, class_pred
+        #-------------------------------------------------------------------------#
        detections = torch.cat((image_pred[:, :5], class_conf.float(), class_pred.float()), 1)

-        # 获得种类
+        #------------------------------------------#
+        #   获得预测结果中包含的所有种类
+        #------------------------------------------#
        unique_labels = detections[:, -1].cpu().unique()

        if prediction.is_cuda:
@@ -171,7 +223,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
            detections = detections.cuda()

        for c in unique_labels:
-            # 获得某一类初步筛选后全部的预测结果
+            #------------------------------------------#
+            #   获得某一类得分筛选后全部的预测结果
+            #------------------------------------------#
            detections_class = detections[detections[:, -1] == c]

            #------------------------------------------#
@@ -179,7 +233,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
            #------------------------------------------#
            keep = nms(
                detections_class[:, :4],
-                detections_class[:, 4]*detections_class[:, 5],
+                detections_class[:, 4] * detections_class[:, 5],
                nms_thres
            )
            max_detections = detections_class[keep]

--- a/video.py
+++ b/video.py
 #-------------------------------------#
-#       调用摄像头检测
+#   调用摄像头或者视频进行检测
+#   调用摄像头直接运行即可
+#   调用视频可以将cv2.VideoCapture()指定路径
+#   视频的保存并不难，可以百度一下看看
 #-------------------------------------#
-from yolo import YOLO
-from PIL import Image
-import numpy as np
-import cv2
 import time
-yolo = YOLO()
-# 调用摄像头
-capture=cv2.VideoCapture(0) # capture=cv2.VideoCapture("1.mp4")

+import cv2
+import numpy as np
+from PIL import Image
+
+from yolo import YOLO
+
+yolo = YOLO()
+#-------------------------------------#
+#   调用摄像头
+#   capture=cv2.VideoCapture("1.mp4")
+#-------------------------------------#
+capture=cv2.VideoCapture(0)
 fps = 0.0
 while(True):
    t1 = time.time()
@@ -19,10 +27,8 @@ while(True):
    frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
    # 转变成Image
    frame = Image.fromarray(np.uint8(frame))
-
    # 进行检测
    frame = np.array(yolo.detect_image(frame))
-
    # RGBtoBGR满足opencv显示格式
    frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)

@@ -32,7 +38,6 @@ while(True):

    cv2.imshow("video",frame)

-
    c= cv2.waitKey(1) & 0xff 
    if c==27:
        capture.release()

--- a/voc_annotation.py
+++ b/voc_annotation.py
+#---------------------------------------------#
+#   运行前一定要修改classes
+#   如果生成的2007_train.txt里面没有目标信息
+#   那么就是因为classes没有设定正确
+#---------------------------------------------#
 import xml.etree.ElementTree as ET
 from os import getcwd


--- a/yolo.py
+++ b/yolo.py
 #-------------------------------------#
 #       创建YOLO类
 #-------------------------------------#
-import cv2
-import numpy as np
 import colorsys
 import os
+
+import cv2
+import numpy as np
 import torch
-import torch.nn as nn
-from nets.yolo3 import YoloBody
 import torch.backends.cudnn as cudnn
-from PIL import Image,ImageFont, ImageDraw
+import torch.nn as nn
+from PIL import Image, ImageDraw, ImageFont
 from torch.autograd import Variable
+
+from nets.yolo3 import YoloBody
 from utils.config import Config
-from utils.utils import non_max_suppression, bbox_iou, DecodeBox,letterbox_image,yolo_correct_boxes
+from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         non_max_suppression, yolo_correct_boxes)
+

 #--------------------------------------------#
 #   使用自己训练好的模型预测需要修改2个参数
 #   model_path和classes_path都需要修改！
+#   如果出现shape不匹配，一定要注意
+#   训练时的model_path和classes_path参数的修改
 #--------------------------------------------#
 class YOLO(object):
    _defaults = {
@@ -52,14 +58,20 @@ class YOLO(object):
            class_names = f.readlines()
        class_names = [c.strip() for c in class_names]
        return class_names
+
    #---------------------------------------------------#
-    #   获得所有的分类
+    #   生成模型
    #---------------------------------------------------#
    def generate(self):
        self.config["yolo"]["classes"] = len(self.class_names)
+        #---------------------------------------------------#
+        #   建立yolov3模型
+        #---------------------------------------------------#
        self.net = YoloBody(self.config)

-        # 加快模型训练的效率
+        #---------------------------------------------------#
+        #   载入yolov3模型的权重
+        #---------------------------------------------------#
        print('Loading weights into state dict...')
        device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
        state_dict = torch.load(self.model_path, map_location=device)
@@ -71,10 +83,12 @@ class YOLO(object):
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()

+        #---------------------------------------------------#
+        #   建立三个特征层解码用的工具
+        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
-            self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"],  (self.model_image_size[1], self.model_image_size[0])))
-
+            self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0])))

        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
@@ -91,44 +105,65 @@ class YOLO(object):
    def detect_image(self, image):
        image_shape = np.array(np.shape(image)[0:2])

+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #---------------------------------------------------------#
        crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
-        photo = np.array(crop_img,dtype = np.float32)
-        photo /= 255.0
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
-        photo = photo.astype(np.float32)
-        images = []
-        images.append(photo)
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]

-        images = np.asarray(images)
-        images = torch.from_numpy(images)
-        if self.cuda:
-            images = images.cuda()
-        
        with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+
+            #---------------------------------------------------------#
+            #   将图像输入网络当中进行预测！
+            #---------------------------------------------------------#
            outputs = self.net(images)
            output_list = []
            for i in range(3):
                output_list.append(self.yolo_decodes[i](outputs[i]))
+                
+            #---------------------------------------------------------#
+            #   将预测框进行堆叠，然后进行非极大抑制
+            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
                                                    conf_thres=self.confidence,
                                                    nms_thres=self.iou)
-        try :
-            batch_detections = batch_detections[0].cpu().numpy()
-        except:
-            return image
-        top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
-        top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
-        top_label = np.array(batch_detections[top_index,-1],np.int32)
-        top_bboxes = np.array(batch_detections[top_index,:4])
-        top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
-
-        # 去掉灰条
-        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                                                    
+            #---------------------------------------------------------#
+            #   如果没有检测出物体，返回原图
+            #---------------------------------------------------------#
+            try :
+                batch_detections = batch_detections[0].cpu().numpy()
+            except:
+                return image
+
+            #---------------------------------------------------------#
+            #   对预测框进行得分筛选
+            #---------------------------------------------------------#
+            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+
+            #-----------------------------------------------------------------#
+            #   在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
+            #   因此生成的top_bboxes是相对于有灰条的图像的
+            #   我们需要对其进行修改，去除灰条的部分。
+            #-----------------------------------------------------------------#
+            boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)

        font = ImageFont.truetype(font='model_data/simhei.ttf',size=np.floor(3e-2 * np.shape(image)[1] + 0.5).astype('int32'))

-        thickness = (np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0]
+        thickness = max((np.shape(image)[0] + np.shape(image)[1]) // self.model_image_size[0], 1)

        for i, c in enumerate(top_label):
            predicted_class = self.class_names[c]
@@ -150,7 +185,7 @@ class YOLO(object):
            draw = ImageDraw.Draw(image)
            label_size = draw.textsize(label, font)
            label = label.encode('utf-8')
-            print(label)
+            print(label, top, left, bottom, right)
            
            if top - label_size[1] >= 0:
                text_origin = np.array([left, top - label_size[1]])