Add files via upload

3e4ce1ac · Bubbliiiing · GitHub · 3336bfec · 3e4ce1ac · 3e4ce1ac
11 changed file
--- a/FPS_test.py
+++ b/FPS_test.py
-import colorsys
-import os
 import time
-import cv2
 import numpy as np
 import torch
-import torch.backends.cudnn as cudnn
+from PIL import Image
-import torch.nn as nn
-from PIL import Image, ImageDraw, ImageFont
-from torch.autograd import Variable
-from tqdm import tqdm
-from nets.yolo3 import YoloBody
+from utils.utils import (letterbox_image,
-from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
                         non_max_suppression, yolo_correct_boxes)
 from yolo import YOLO

--- a/get_dr_txt.py
+++ b/get_dr_txt.py
@@ -3,25 +3,17 @@
 #   具体视频教程可查看
 #   https://www.bilibili.com/video/BV1zE411u7Vw
 #----------------------------------------------------#
-import colorsys
 import os
-import cv2
 import numpy as np
 import torch
-import torch.backends.cudnn as cudnn
+from PIL import Image
-import torch.nn as nn
-from PIL import Image, ImageDraw, ImageFont
-from torch.autograd import Variable
 from tqdm import tqdm
-from nets.yolo3 import YoloBody
+from utils.utils import (letterbox_image, non_max_suppression,
-from utils.config import Config
+                         yolo_correct_boxes)
-from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
-                         non_max_suppression, yolo_correct_boxes)
 from yolo import YOLO
 '''
 这里设置的门限值较低是因为计算map需要用到不同门限条件下的Recall和Precision值。
 所以只有保留的框足够多，计算的map才会更精确，详情可以了解map的原理。
@@ -79,12 +71,10 @@ class mAP_Yolo(YOLO):
            #   将预测框进行堆叠，然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
-            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
+            batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou)
-                                                    conf_thres=self.confidence,
-                                                    nms_thres=self.iou)
            #---------------------------------------------------------#
-            #   如果没有检测出物体，返回原图
+            #   如果没有检测出物体，返回
            #---------------------------------------------------------#
            try :
                batch_detections = batch_detections[0].cpu().numpy()
@@ -94,10 +84,10 @@ class mAP_Yolo(YOLO):
            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
-            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_index   = batch_detections[:,4] * batch_detections[:,5] > self.confidence
-            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_conf    = batch_detections[top_index,4]*batch_detections[top_index,5]
-            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_label   = np.array(batch_detections[top_index,-1],np.int32)
-            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_bboxes  = np.array(batch_detections[top_index,:4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
            #-----------------------------------------------------------------#

--- a/get_gt_txt.py
+++ b/get_gt_txt.py
@@ -3,9 +3,7 @@
 #   具体视频教程可查看
 #   https://www.bilibili.com/video/BV1zE411u7Vw
 #----------------------------------------------------#
-import sys
 import os
-import glob
 import xml.etree.ElementTree as ET
 '''

--- a/nets/yolo3.py
+++ b/nets/yolo3.py
@@ -32,9 +32,8 @@ def make_last_layers(filters_list, in_filters, out_filter):
    return m
 class YoloBody(nn.Module):
-    def __init__(self, config):
+    def __init__(self, anchor, num_classes):
        super(YoloBody, self).__init__()
-        self.config = config
        #---------------------------------------------------#   
        #   生成darknet53的主干模型
        #   获得三个有效特征层，他们的shape分别是：
@@ -51,15 +50,15 @@ class YoloBody(nn.Module):
        #   计算yolo_head的输出通道数，对于voc数据集而言
        #   final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
        #------------------------------------------------------------------------#
-        final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"])
+        final_out_filter0 = len(anchor[0]) * (5 + num_classes)
        self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)
-        final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"])
+        final_out_filter1 = len(anchor[1]) * (5 + num_classes)
        self.last_layer1_conv = conv2d(512, 256, 1)
        self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)
-        final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"])
+        final_out_filter2 = len(anchor[2]) * (5 + num_classes)
        self.last_layer2_conv = conv2d(256, 128, 1)
        self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
        self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)

--- a/nets/yolo_training.py
+++ b/nets/yolo_training.py
-import math
+import os
-from random import shuffle
-import cv2
+import math
 import numpy as np
+import scipy.signal
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
+from matplotlib import pyplot as plt
-from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
-from PIL import Image
-from utils.utils import bbox_iou
 def jaccard(_box_a, _box_b):
    # 计算真实框的左上角和右下角
@@ -397,149 +393,51 @@ class YOLOLoss(nn.Module):
                anch_ious_max = anch_ious_max.view(pred_boxes[i].size()[:3])
                noobj_mask[i][anch_ious_max>self.ignore_threshold] = 0
        return noobj_mask
+class LossHistory():
-def rand(a=0, b=1):
+    def __init__(self, log_dir):
-    return np.random.rand()*(b-a) + a
+        import datetime
+        curr_time = datetime.datetime.now()
+        time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
-class Generator(object):
+        self.log_dir    = log_dir
-    def __init__(self,batch_size,
+        self.time_str   = time_str
-                 train_lines, image_size,
+        self.save_path  = os.path.join(self.log_dir, "loss_" + str(self.time_str))
-                 ):
+        self.losses     = []
+        self.val_loss   = []
-        self.batch_size = batch_size
-        self.train_lines = train_lines
-        self.train_batches = len(train_lines)
-        self.image_size = image_size
-    def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True):
+        os.makedirs(self.save_path)
-        '''r实时数据增强的随机预处理'''
-        line = annotation_line.split()
+    def append_loss(self, loss, val_loss):
-        image = Image.open(line[0])
+        self.losses.append(loss)
-        iw, ih = image.size
+        self.val_loss.append(val_loss)
-        h, w = input_shape
+        with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
-        box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]])
+            f.write(str(loss))
+            f.write("\n")
-        if not random:
+        with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
-            scale = min(w/iw, h/ih)
+            f.write(str(val_loss))
-            nw = int(iw*scale)
+            f.write("\n")
-            nh = int(ih*scale)
+        self.loss_plot()
-            dx = (w-nw)//2
-            dy = (h-nh)//2
+    def loss_plot(self):
+        iters = range(len(self.losses))
-            image = image.resize((nw,nh), Image.BICUBIC)
-            new_image = Image.new('RGB', (w,h), (128,128,128))
+        plt.figure()
-            new_image.paste(image, (dx, dy))
+        plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
-            image_data = np.array(new_image, np.float32)
+        plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
+        try:
-            # 调整目标框坐标
+            if len(self.losses) < 25:
-            box_data = np.zeros((len(box), 5))
+                num = 5
-            if len(box) > 0:
+            else:
-                np.random.shuffle(box)
+                num = 15
-                box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
-                box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy
+            plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
-                box[:, 0:2][box[:, 0:2] < 0] = 0
+            plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
-                box[:, 2][box[:, 2] > w] = w
+        except:
-                box[:, 3][box[:, 3] > h] = h
+            pass
-                box_w = box[:, 2] - box[:, 0]
-                box_h = box[:, 3] - box[:, 1]
-                box = box[np.logical_and(box_w > 1, box_h > 1)]  # 保留有效框
-                box_data = np.zeros((len(box), 5))
-                box_data[:len(box)] = box
-            return image_data, box_data
-        # resize image
-        new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
-        scale = rand(.25, 2)
-        if new_ar < 1:
-            nh = int(scale*h)
-            nw = int(nh*new_ar)
-        else:
-            nw = int(scale*w)
-            nh = int(nw/new_ar)
-        image = image.resize((nw,nh), Image.BICUBIC)
-        # place image
-        dx = int(rand(0, w-nw))
-        dy = int(rand(0, h-nh))
-        new_image = Image.new('RGB', (w,h), (128,128,128))
-        new_image.paste(image, (dx, dy))
-        image = new_image
-        # flip image or not
-        flip = rand()<.5
-        if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
-        # distort image
-        hue = rand(-hue, hue)
-        sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
-        val = rand(1, val) if rand()<.5 else 1/rand(1, val)
-        x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
-        x[..., 0] += hue*360
-        x[..., 0][x[..., 0]>1] -= 1
-        x[..., 0][x[..., 0]<0] += 1
-        x[..., 1] *= sat
-        x[..., 2] *= val
-        x[x[:,:, 0]>360, 0] = 360
-        x[:, :, 1:][x[:, :, 1:]>1] = 1
-        x[x<0] = 0
-        image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
-        # correct boxes
-        box_data = np.zeros((len(box),5))
-        if len(box)>0:
-            np.random.shuffle(box)
-            box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
-            box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
-            if flip: box[:, [0,2]] = w - box[:, [2,0]]
-            box[:, 0:2][box[:, 0:2]<0] = 0
-            box[:, 2][box[:, 2]>w] = w
-            box[:, 3][box[:, 3]>h] = h
-            box_w = box[:, 2] - box[:, 0]
-            box_h = box[:, 3] - box[:, 1]
-            box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
-            box_data = np.zeros((len(box),5))
-            box_data[:len(box)] = box
-        return image_data, box_data
-    def generate(self, train=True):
-        while True:
-            shuffle(self.train_lines)
-            lines = self.train_lines
-            inputs = []
-            targets = []
-            for annotation_line in lines:  
-                if train:
-                    img,y=self.get_random_data(annotation_line, self.image_size[0:2])
-                else:
-                    img,y=self.get_random_data(annotation_line, self.image_size[0:2], random=False)
-                if len(y)!=0:
-                    boxes = np.array(y[:,:4],dtype=np.float32)
-                    boxes[:,0] = boxes[:,0]/self.image_size[1]
-                    boxes[:,1] = boxes[:,1]/self.image_size[0]
-                    boxes[:,2] = boxes[:,2]/self.image_size[1]
-                    boxes[:,3] = boxes[:,3]/self.image_size[0]
-                    boxes = np.maximum(np.minimum(boxes,1),0)
-                    boxes[:,2] = boxes[:,2] - boxes[:,0]
-                    boxes[:,3] = boxes[:,3] - boxes[:,1]
-                    boxes[:,0] = boxes[:,0] + boxes[:,2]/2
-                    boxes[:,1] = boxes[:,1] + boxes[:,3]/2
-                    y = np.concatenate([boxes,y[:,-1:]],axis=-1)
-                img = np.array(img,dtype = np.float32)
-                inputs.append(np.transpose(img/255.0,(2,0,1)))                  
+        plt.grid(True)
-                targets.append(np.array(y,dtype = np.float32))
+        plt.xlabel('Epoch')
-                if len(targets) == self.batch_size:
+        plt.ylabel('Loss')
-                    tmp_inp = np.array(inputs)
+        plt.legend(loc="upper right")
-                    tmp_targets = targets
-                    inputs = []
-                    targets = []
-                    yield tmp_inp, tmp_targets
+        plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
--- a/test.py
+++ b/test.py
@@ -7,11 +7,9 @@ import torch
 from torchsummary import summary
 from nets.yolo3 import YoloBody
-from utils.config import Config
 if __name__ == "__main__":
    # 需要使用device来指定网络在GPU还是CPU运行
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    config = {"model_params": {"backbone_name": "darknet_53"},"yolo": {"anchors": [[1,2,3],[2,3,4],[3,4,5]],"classes": 80}}
+    m = YoloBody([[1,2,3],[2,3,4],[3,4,5]], 80).to(device)
-    m = YoloBody(config).to(device)
    summary(m, input_size=(3, 416, 416))
--- a/train.py
+++ b/train.py
 #-------------------------------------#
 #       对数据集进行训练
 #-------------------------------------#
-import os
-import time
 import numpy as np
 import torch
 import torch.backends.cudnn as cudnn
-import torch.nn as nn
-import torch.nn.functional as F
 import torch.optim as optim
-from torch.autograd import Variable
 from torch.utils.data import DataLoader
 from tqdm import tqdm
 from nets.yolo3 import YoloBody
-from nets.yolo_training import Generator, YOLOLoss
+from nets.yolo_training import YOLOLoss, LossHistory
-from utils.config import Config
 from utils.dataloader import YoloDataset, yolo_dataset_collate
+def get_anchors(anchors_path):
+    with open(anchors_path) as f:
+        anchors = f.readline()
+    anchors = [float(x) for x in anchors.split(',')]
+    return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]
 def get_lr(optimizer):
    for param_group in optimizer.param_groups:
        return param_group['lr']
-def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
+def fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
    total_loss = 0
    val_loss = 0
    net.train()
+    print('Start Train')
    with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
        for iteration, batch in enumerate(gen):
            if iteration >= epoch_size:
                break
            images, targets = batch[0], batch[1]
            with torch.no_grad():
                if cuda:
-                    images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda()
+                    images  = torch.from_numpy(images).type(torch.FloatTensor).cuda()
-                    targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
                else:
-                    images = Variable(torch.from_numpy(images).type(torch.FloatTensor))
+                    images  = torch.from_numpy(images).type(torch.FloatTensor)
-                    targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets]
+                    targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
            #----------------------#
            #   清零梯度
            #----------------------#
@@ -49,14 +49,14 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
            #----------------------#
            #   前向传播
            #----------------------#
-            outputs = net(images)
+            outputs     = net(images)
-            losses = []
+            losses      = []
            num_pos_all = 0
            #----------------------#
            #   计算损失
            #----------------------#
            for i in range(3):
-                loss_item, num_pos = yolo_losses[i](outputs[i], targets)
+                loss_item, num_pos = yolo_loss(outputs[i], targets)
                losses.append(loss_item)
                num_pos_all += num_pos
@@ -73,6 +73,8 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
                                'lr'        : get_lr(optimizer)})
            pbar.update(1)
+    print('Finish Train')
    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
@@ -83,29 +85,36 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
            with torch.no_grad():
                if cuda:
-                    images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)).cuda()
+                    images_val  = torch.from_numpy(images_val).type(torch.FloatTensor).cuda()
-                    targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
+                    targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
                else:
-                    images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor))
+                    images_val  = torch.from_numpy(images_val).type(torch.FloatTensor)
-                    targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val]
+                    targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
                optimizer.zero_grad()
-                outputs = net(images_val)
-                losses = []
+                outputs     = net(images_val)
+                losses      = []
                num_pos_all = 0
+                #----------------------#
+                #   计算损失
+                #----------------------#
                for i in range(3):
-                    loss_item, num_pos = yolo_losses[i](outputs[i], targets_val)
+                    loss_item, num_pos = yolo_loss(outputs[i], targets_val)
                    losses.append(loss_item)
                    num_pos_all += num_pos
                loss = sum(losses) / num_pos_all
                val_loss += loss.item()
            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)
+    loss_history.append_loss(total_loss/(epoch_size+1), val_loss/(epoch_size_val+1))
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
-    print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
+    print('Total Loss: %.4f || Val Loss: %.4f ' %(total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))
    print('Saving state, iter:', str(epoch+1))
-    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_loss/(epoch_size_val+1)))
+    torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch + 1), total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))
 #----------------------------------------------------#
 #   检测精度mAP和pr曲线计算参考视频
@@ -117,28 +126,39 @@ if __name__ == "__main__":
    #   没有GPU可以设置成False
    #-------------------------------#
    Cuda = True
-    #-------------------------------#
-    #   Dataloder的使用
-    #-------------------------------#
-    Use_Data_Loader = True
    #------------------------------------------------------#
    #   是否对损失进行归一化，用于改变loss的大小
    #   用于决定计算最终loss是除上batch_size还是除上正样本数量
    #------------------------------------------------------#
    normalize = False
    #------------------------------------------------------#
+    #   输入的shape大小
+    #------------------------------------------------------#
+    input_shape = (416, 416)
+    #------------------------------------------------------#
+    #   视频中的Config.py已经移除
+    #   需要修改num_classes直接修改此处的num_classes即可
+    #   如果需要检测5个类, 这里就写5. 默认为20
+    #------------------------------------------------------#
+    num_classes = 20
+    #----------------------------------------------------#
+    #   先验框anchor的路径
+    #----------------------------------------------------#
+    anchors_path = 'model_data/yolo_anchors.txt'
+    anchors      = get_anchors(anchors_path)
+    #------------------------------------------------------#
    #   创建yolo模型
    #   训练前一定要修改Config里面的classes参数
    #------------------------------------------------------#
-    model = YoloBody(Config)
+    model = YoloBody(anchors, num_classes)
    #------------------------------------------------------#
    #   权值文件请看README，百度网盘下载
    #------------------------------------------------------#
-    model_path = "model_data/yolo_weights.pth"
+    model_path      = "model_data/yolo_weights.pth"
    print('Loading weights into state dict...')
-    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+    device          = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
-    model_dict = model.state_dict()
+    model_dict      = model.state_dict()
    pretrained_dict = torch.load(model_path, map_location=device)
    pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) ==  np.shape(v)}
    model_dict.update(pretrained_dict)
@@ -152,11 +172,8 @@ if __name__ == "__main__":
        cudnn.benchmark = True
        net = net.cuda()
-    # 建立loss函数
+    yolo_loss    = YOLOLoss(np.reshape(anchors,[-1,2]), num_classes, (input_shape[1], input_shape[0]), Cuda, normalize)
-    yolo_losses = []
+    loss_history = LossHistory("logs/")
-    for i in range(3):
-        yolo_losses.append(YOLOLoss(np.reshape(Config["yolo"]["anchors"],[-1,2]),
-                                    Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda, normalize))
    #----------------------------------------------------#
    #   获得图片路径和标签
@@ -185,69 +202,65 @@ if __name__ == "__main__":
    #   提示OOM或者显存不足请调小Batch_size
    #------------------------------------------------------#
    if True:
-        lr = 1e-3
+        lr              = 1e-3
-        Batch_size = 8
+        Batch_size      = 8
-        Init_Epoch = 0
+        Init_Epoch      = 0
-        Freeze_Epoch = 50
+        Freeze_Epoch    = 50
-        optimizer = optim.Adam(net.parameters(),lr)
+        optimizer       = optim.Adam(net.parameters(),lr)
-        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92)
+        lr_scheduler    = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
-        if Use_Data_Loader:
+        train_dataset   = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), True)
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+        val_dataset     = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), False)
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
+        gen             = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
-            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
-            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
+        gen_val         = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
-        else:
-            gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate(True)
-            gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate(False)
-        epoch_size = num_train//Batch_size
-        epoch_size_val = num_val//Batch_size
        #------------------------------------#
        #   冻结一定部分训练
        #------------------------------------#
        for param in model.backbone.parameters():
            param.requires_grad = False
+        epoch_size      = num_train // Batch_size
+        epoch_size_val  = num_val // Batch_size
+        if epoch_size == 0 or epoch_size_val == 0:
+            raise ValueError("数据集过小，无法进行训练，请扩充数据集。")
        for epoch in range(Init_Epoch,Freeze_Epoch):
-            fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda)
+            fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, gen_val, Freeze_Epoch, Cuda)
            lr_scheduler.step()
    if True:
-        lr = 1e-4
+        lr              = 1e-4
-        Batch_size = 4
+        Batch_size      = 4
-        Freeze_Epoch = 50
+        Freeze_Epoch    = 50
-        Unfreeze_Epoch = 100
+        Unfreeze_Epoch  = 100
-        optimizer = optim.Adam(net.parameters(),lr)
+        optimizer       = optim.Adam(net.parameters(),lr)
-        lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92)
+        lr_scheduler    = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92)
-        if Use_Data_Loader:
+        train_dataset   = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), True)
-            train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True)
+        val_dataset     = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), False)
-            val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False)
+        gen             = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
-            gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
                                    drop_last=True, collate_fn=yolo_dataset_collate)
-            gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, 
+        gen_val         = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True, 
                                    drop_last=True, collate_fn=yolo_dataset_collate)
-        else:
-            gen = Generator(Batch_size, lines[:num_train],
-                             (Config["img_h"], Config["img_w"])).generate(True)
-            gen_val = Generator(Batch_size, lines[num_train:],
-                             (Config["img_h"], Config["img_w"])).generate(False)
-        epoch_size = num_train//Batch_size
-        epoch_size_val = num_val//Batch_size
        #------------------------------------#
        #   解冻后训练
        #------------------------------------#
        for param in model.backbone.parameters():
            param.requires_grad = True
+        epoch_size      = num_train//Batch_size
+        epoch_size_val  = num_val//Batch_size
+        if epoch_size == 0 or epoch_size_val == 0:
+            raise ValueError("数据集过小，无法进行训练，请扩充数据集。")
        for epoch in range(Freeze_Epoch,Unfreeze_Epoch):
-            fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Unfreeze_Epoch,Cuda)
+            fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, gen_val, Unfreeze_Epoch, Cuda)
            lr_scheduler.step()
--- a/utils/dataloader.py
+++ b/utils/dataloader.py
-from random import shuffle
+import cv2
 import numpy as np
-import torch
-import torch.nn as nn
-import math
-import torch.nn.functional as F
 from PIL import Image
-from torch.autograd import Variable
-from torch.utils.data import DataLoader
 from torch.utils.data.dataset import Dataset
-from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
-from nets.yolo_training import Generator
-import cv2
 class YoloDataset(Dataset):
    def __init__(self, train_lines, image_size, is_train):
@@ -152,7 +144,6 @@ class YoloDataset(Dataset):
        tmp_targets = np.array(y, dtype=np.float32)
        return tmp_inp, tmp_targets
 # DataLoader中collate_fn使用
 def yolo_dataset_collate(batch):
    images = []

--- a/utils/utils.py
+++ b/utils/utils.py
 from __future__ import division
-import math
-import os
-import time
 import numpy as np
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
+from PIL import Image
-from PIL import Image, ImageDraw, ImageFont
-from torch.autograd import Variable
 from torchvision.ops import nms

--- a/voc_annotation.py
+++ b/voc_annotation.py
@@ -7,7 +7,9 @@ import xml.etree.ElementTree as ET
 from os import getcwd
 sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
+#-----------------------------------------------------#
+#   这里设定的classes顺序要和model_data里的txt一样
+#-----------------------------------------------------#
 classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
 def convert_annotation(year, image_id, list_file):
@@ -19,20 +21,19 @@ def convert_annotation(year, image_id, list_file):
        difficult = 0 
        if obj.find('difficult')!=None:
            difficult = obj.find('difficult').text
        cls = obj.find('name').text
        if cls not in classes or int(difficult)==1:
            continue
        cls_id = classes.index(cls)
        xmlbox = obj.find('bndbox')
-        b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text))
+        b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
        list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
 wd = getcwd()
 for year, image_set in sets:
-    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
+    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set), encoding='utf-8').read().strip().split()
-    list_file = open('%s_%s.txt'%(year, image_set), 'w')
+    list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
    for image_id in image_ids:
        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
        convert_annotation(year, image_id, list_file)

--- a/yolo.py
+++ b/yolo.py
@@ -4,18 +4,14 @@
 import colorsys
 import os
-import cv2
 import numpy as np
 import torch
-import torch.backends.cudnn as cudnn
 import torch.nn as nn
 from PIL import Image, ImageDraw, ImageFont
-from torch.autograd import Variable
 from nets.yolo3 import YoloBody
-from utils.config import Config
+from utils.utils import (DecodeBox, letterbox_image, non_max_suppression,
-from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
+                         yolo_correct_boxes)
-                         non_max_suppression, yolo_correct_boxes)
 #--------------------------------------------#
@@ -27,6 +23,7 @@ from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
 class YOLO(object):
    _defaults = {
        "model_path"        : 'model_data/yolo_weights.pth',
+        "anchors_path"      : 'model_data/yolo_anchors.txt',
        "classes_path"      : 'model_data/coco_classes.txt',
        "model_image_size"  : (416, 416, 3),
        "confidence"        : 0.5,
@@ -52,8 +49,9 @@ class YOLO(object):
    def __init__(self, **kwargs):
        self.__dict__.update(self._defaults)
        self.class_names = self._get_class()
-        self.config = Config
+        self.anchors = self._get_anchors()
        self.generate()
    #---------------------------------------------------#
    #   获得所有的分类
    #---------------------------------------------------#
@@ -64,15 +62,25 @@ class YOLO(object):
        class_names = [c.strip() for c in class_names]
        return class_names
+    #---------------------------------------------------#
+    #   获得所有的先验框
+    #---------------------------------------------------#
+    def _get_anchors(self):
+        anchors_path = os.path.expanduser(self.anchors_path)
+        with open(anchors_path) as f:
+            anchors = f.readline()
+        anchors = [float(x) for x in anchors.split(',')]
+        return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]
    #---------------------------------------------------#
    #   生成模型
    #---------------------------------------------------#
    def generate(self):
-        self.config["yolo"]["classes"] = len(self.class_names)
+        self.num_classes = len(self.class_names)
        #---------------------------------------------------#
        #   建立yolov3模型
        #---------------------------------------------------#
-        self.net = YoloBody(self.config)
+        self.net = YoloBody(self.anchors, self.num_classes)
        #---------------------------------------------------#
        #   载入yolov3模型的权重
@@ -84,7 +92,6 @@ class YOLO(object):
        self.net = self.net.eval()
        if self.cuda:
-            os.environ["CUDA_VISIBLE_DEVICES"] = '0'
            self.net = nn.DataParallel(self.net)
            self.net = self.net.cuda()
@@ -93,7 +100,7 @@ class YOLO(object):
        #---------------------------------------------------#
        self.yolo_decodes = []
        for i in range(3):
-            self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0])))
+            self.yolo_decodes.append(DecodeBox(self.anchors[i], self.num_classes, (self.model_image_size[1], self.model_image_size[0])))
        print('{} model, anchors, and classes loaded.'.format(self.model_path))
        # 画框设置不同的颜色
@@ -108,17 +115,21 @@ class YOLO(object):
    #   检测图片
    #---------------------------------------------------#
    def detect_image(self, image):
-        image_shape = np.array(np.shape(image)[0:2])
+        #---------------------------------------------------------#
+        #   在这里将图像转换成RGB图像，防止灰度图在预测时报错。
+        #---------------------------------------------------------#
+        image = image.convert('RGB')
+        image_shape = np.array(np.shape(image)[0:2])
        #---------------------------------------------------------#
        #   给图像增加灰条，实现不失真的resize
        #   也可以直接resize进行识别
        #---------------------------------------------------------#
        if self.letterbox_image:
-            crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
+            crop_img = np.array(letterbox_image(image, (self.model_image_size[1], self.model_image_size[0])))
        else:
-            crop_img = image.convert('RGB')
+            crop_img = image.resize((self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC)
-            crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
        photo = np.array(crop_img,dtype = np.float32) / 255.0
        photo = np.transpose(photo, (2, 0, 1))
        #---------------------------------------------------------#
@@ -143,9 +154,7 @@ class YOLO(object):
            #   将预测框进行堆叠，然后进行非极大抑制
            #---------------------------------------------------------#
            output = torch.cat(output_list, 1)
-            batch_detections = non_max_suppression(output, self.config["yolo"]["classes"],
+            batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou)
-                                                    conf_thres=self.confidence,
-                                                    nms_thres=self.iou)
            #---------------------------------------------------------#
            #   如果没有检测出物体，返回原图
@@ -158,10 +167,10 @@ class YOLO(object):
            #---------------------------------------------------------#
            #   对预测框进行得分筛选
            #---------------------------------------------------------#
-            top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
+            top_index   = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
-            top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+            top_conf    = batch_detections[top_index, 4] * batch_detections[top_index, 5]
-            top_label = np.array(batch_detections[top_index,-1],np.int32)
+            top_label   = np.array(batch_detections[top_index, -1],np.int32)
-            top_bboxes = np.array(batch_detections[top_index,:4])
+            top_bboxes  = np.array(batch_detections[top_index, :4])
            top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
            #-----------------------------------------------------------------#
@@ -170,7 +179,7 @@ class YOLO(object):
            #   我们需要对其进行修改，去除灰条的部分。
            #-----------------------------------------------------------------#
            if self.letterbox_image:
-                boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0],self.model_image_size[1]]), image_shape)
            else:
                top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
                top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]