未验证 提交 3e4ce1ac 编写于 作者: B Bubbliiiing 提交者: GitHub

Add files via upload

上级 3336bfec
import colorsys
import os
import time import time
import cv2
import numpy as np import numpy as np
import torch import torch
import torch.backends.cudnn as cudnn from PIL import Image
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from tqdm import tqdm
from nets.yolo3 import YoloBody from utils.utils import (letterbox_image,
from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
non_max_suppression, yolo_correct_boxes) non_max_suppression, yolo_correct_boxes)
from yolo import YOLO from yolo import YOLO
......
...@@ -3,25 +3,17 @@ ...@@ -3,25 +3,17 @@
# 具体视频教程可查看 # 具体视频教程可查看
# https://www.bilibili.com/video/BV1zE411u7Vw # https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------# #----------------------------------------------------#
import colorsys
import os import os
import cv2
import numpy as np import numpy as np
import torch import torch
import torch.backends.cudnn as cudnn from PIL import Image
import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from tqdm import tqdm from tqdm import tqdm
from nets.yolo3 import YoloBody from utils.utils import (letterbox_image, non_max_suppression,
from utils.config import Config yolo_correct_boxes)
from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
non_max_suppression, yolo_correct_boxes)
from yolo import YOLO from yolo import YOLO
''' '''
这里设置的门限值较低是因为计算map需要用到不同门限条件下的Recall和Precision值。 这里设置的门限值较低是因为计算map需要用到不同门限条件下的Recall和Precision值。
所以只有保留的框足够多,计算的map才会更精确,详情可以了解map的原理。 所以只有保留的框足够多,计算的map才会更精确,详情可以了解map的原理。
...@@ -79,12 +71,10 @@ class mAP_Yolo(YOLO): ...@@ -79,12 +71,10 @@ class mAP_Yolo(YOLO):
# 将预测框进行堆叠,然后进行非极大抑制 # 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------# #---------------------------------------------------------#
output = torch.cat(output_list, 1) output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou)
conf_thres=self.confidence,
nms_thres=self.iou)
#---------------------------------------------------------# #---------------------------------------------------------#
# 如果没有检测出物体,返回原图 # 如果没有检测出物体,返回
#---------------------------------------------------------# #---------------------------------------------------------#
try : try :
batch_detections = batch_detections[0].cpu().numpy() batch_detections = batch_detections[0].cpu().numpy()
...@@ -94,10 +84,10 @@ class mAP_Yolo(YOLO): ...@@ -94,10 +84,10 @@ class mAP_Yolo(YOLO):
#---------------------------------------------------------# #---------------------------------------------------------#
# 对预测框进行得分筛选 # 对预测框进行得分筛选
#---------------------------------------------------------# #---------------------------------------------------------#
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
top_label = np.array(batch_detections[top_index,-1],np.int32) top_label = np.array(batch_detections[top_index,-1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4]) top_bboxes = np.array(batch_detections[top_index,:4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
#-----------------------------------------------------------------# #-----------------------------------------------------------------#
......
...@@ -3,9 +3,7 @@ ...@@ -3,9 +3,7 @@
# 具体视频教程可查看 # 具体视频教程可查看
# https://www.bilibili.com/video/BV1zE411u7Vw # https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------# #----------------------------------------------------#
import sys
import os import os
import glob
import xml.etree.ElementTree as ET import xml.etree.ElementTree as ET
''' '''
......
...@@ -32,9 +32,8 @@ def make_last_layers(filters_list, in_filters, out_filter): ...@@ -32,9 +32,8 @@ def make_last_layers(filters_list, in_filters, out_filter):
return m return m
class YoloBody(nn.Module): class YoloBody(nn.Module):
def __init__(self, config): def __init__(self, anchor, num_classes):
super(YoloBody, self).__init__() super(YoloBody, self).__init__()
self.config = config
#---------------------------------------------------# #---------------------------------------------------#
# 生成darknet53的主干模型 # 生成darknet53的主干模型
# 获得三个有效特征层,他们的shape分别是: # 获得三个有效特征层,他们的shape分别是:
...@@ -51,15 +50,15 @@ class YoloBody(nn.Module): ...@@ -51,15 +50,15 @@ class YoloBody(nn.Module):
# 计算yolo_head的输出通道数,对于voc数据集而言 # 计算yolo_head的输出通道数,对于voc数据集而言
# final_out_filter0 = final_out_filter1 = final_out_filter2 = 75 # final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
#------------------------------------------------------------------------# #------------------------------------------------------------------------#
final_out_filter0 = len(config["yolo"]["anchors"][0]) * (5 + config["yolo"]["classes"]) final_out_filter0 = len(anchor[0]) * (5 + num_classes)
self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0) self.last_layer0 = make_last_layers([512, 1024], out_filters[-1], final_out_filter0)
final_out_filter1 = len(config["yolo"]["anchors"][1]) * (5 + config["yolo"]["classes"]) final_out_filter1 = len(anchor[1]) * (5 + num_classes)
self.last_layer1_conv = conv2d(512, 256, 1) self.last_layer1_conv = conv2d(512, 256, 1)
self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer1_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1) self.last_layer1 = make_last_layers([256, 512], out_filters[-2] + 256, final_out_filter1)
final_out_filter2 = len(config["yolo"]["anchors"][2]) * (5 + config["yolo"]["classes"]) final_out_filter2 = len(anchor[2]) * (5 + num_classes)
self.last_layer2_conv = conv2d(256, 128, 1) self.last_layer2_conv = conv2d(256, 128, 1)
self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest') self.last_layer2_upsample = nn.Upsample(scale_factor=2, mode='nearest')
self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2) self.last_layer2 = make_last_layers([128, 256], out_filters[-3] + 128, final_out_filter2)
......
import math import os
from random import shuffle
import cv2 import math
import numpy as np import numpy as np
import scipy.signal
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F from matplotlib import pyplot as plt
from matplotlib.colors import hsv_to_rgb, rgb_to_hsv
from PIL import Image
from utils.utils import bbox_iou
def jaccard(_box_a, _box_b): def jaccard(_box_a, _box_b):
# 计算真实框的左上角和右下角 # 计算真实框的左上角和右下角
...@@ -397,149 +393,51 @@ class YOLOLoss(nn.Module): ...@@ -397,149 +393,51 @@ class YOLOLoss(nn.Module):
anch_ious_max = anch_ious_max.view(pred_boxes[i].size()[:3]) anch_ious_max = anch_ious_max.view(pred_boxes[i].size()[:3])
noobj_mask[i][anch_ious_max>self.ignore_threshold] = 0 noobj_mask[i][anch_ious_max>self.ignore_threshold] = 0
return noobj_mask return noobj_mask
class LossHistory():
def rand(a=0, b=1): def __init__(self, log_dir):
return np.random.rand()*(b-a) + a import datetime
curr_time = datetime.datetime.now()
time_str = datetime.datetime.strftime(curr_time,'%Y_%m_%d_%H_%M_%S')
class Generator(object): self.log_dir = log_dir
def __init__(self,batch_size, self.time_str = time_str
train_lines, image_size, self.save_path = os.path.join(self.log_dir, "loss_" + str(self.time_str))
): self.losses = []
self.val_loss = []
self.batch_size = batch_size
self.train_lines = train_lines
self.train_batches = len(train_lines)
self.image_size = image_size
def get_random_data(self, annotation_line, input_shape, jitter=.3, hue=.1, sat=1.5, val=1.5, random=True): os.makedirs(self.save_path)
'''r实时数据增强的随机预处理'''
line = annotation_line.split() def append_loss(self, loss, val_loss):
image = Image.open(line[0]) self.losses.append(loss)
iw, ih = image.size self.val_loss.append(val_loss)
h, w = input_shape with open(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".txt"), 'a') as f:
box = np.array([np.array(list(map(int,box.split(',')))) for box in line[1:]]) f.write(str(loss))
f.write("\n")
if not random: with open(os.path.join(self.save_path, "epoch_val_loss_" + str(self.time_str) + ".txt"), 'a') as f:
scale = min(w/iw, h/ih) f.write(str(val_loss))
nw = int(iw*scale) f.write("\n")
nh = int(ih*scale) self.loss_plot()
dx = (w-nw)//2
dy = (h-nh)//2 def loss_plot(self):
iters = range(len(self.losses))
image = image.resize((nw,nh), Image.BICUBIC)
new_image = Image.new('RGB', (w,h), (128,128,128)) plt.figure()
new_image.paste(image, (dx, dy)) plt.plot(iters, self.losses, 'red', linewidth = 2, label='train loss')
image_data = np.array(new_image, np.float32) plt.plot(iters, self.val_loss, 'coral', linewidth = 2, label='val loss')
try:
# 调整目标框坐标 if len(self.losses) < 25:
box_data = np.zeros((len(box), 5)) num = 5
if len(box) > 0: else:
np.random.shuffle(box) num = 15
box[:, [0, 2]] = box[:, [0, 2]] * nw / iw + dx
box[:, [1, 3]] = box[:, [1, 3]] * nh / ih + dy plt.plot(iters, scipy.signal.savgol_filter(self.losses, num, 3), 'green', linestyle = '--', linewidth = 2, label='smooth train loss')
box[:, 0:2][box[:, 0:2] < 0] = 0 plt.plot(iters, scipy.signal.savgol_filter(self.val_loss, num, 3), '#8B4513', linestyle = '--', linewidth = 2, label='smooth val loss')
box[:, 2][box[:, 2] > w] = w except:
box[:, 3][box[:, 3] > h] = h pass
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w > 1, box_h > 1)] # 保留有效框
box_data = np.zeros((len(box), 5))
box_data[:len(box)] = box
return image_data, box_data
# resize image
new_ar = w/h * rand(1-jitter,1+jitter)/rand(1-jitter,1+jitter)
scale = rand(.25, 2)
if new_ar < 1:
nh = int(scale*h)
nw = int(nh*new_ar)
else:
nw = int(scale*w)
nh = int(nw/new_ar)
image = image.resize((nw,nh), Image.BICUBIC)
# place image
dx = int(rand(0, w-nw))
dy = int(rand(0, h-nh))
new_image = Image.new('RGB', (w,h), (128,128,128))
new_image.paste(image, (dx, dy))
image = new_image
# flip image or not
flip = rand()<.5
if flip: image = image.transpose(Image.FLIP_LEFT_RIGHT)
# distort image
hue = rand(-hue, hue)
sat = rand(1, sat) if rand()<.5 else 1/rand(1, sat)
val = rand(1, val) if rand()<.5 else 1/rand(1, val)
x = cv2.cvtColor(np.array(image,np.float32)/255, cv2.COLOR_RGB2HSV)
x[..., 0] += hue*360
x[..., 0][x[..., 0]>1] -= 1
x[..., 0][x[..., 0]<0] += 1
x[..., 1] *= sat
x[..., 2] *= val
x[x[:,:, 0]>360, 0] = 360
x[:, :, 1:][x[:, :, 1:]>1] = 1
x[x<0] = 0
image_data = cv2.cvtColor(x, cv2.COLOR_HSV2RGB)*255
# correct boxes
box_data = np.zeros((len(box),5))
if len(box)>0:
np.random.shuffle(box)
box[:, [0,2]] = box[:, [0,2]]*nw/iw + dx
box[:, [1,3]] = box[:, [1,3]]*nh/ih + dy
if flip: box[:, [0,2]] = w - box[:, [2,0]]
box[:, 0:2][box[:, 0:2]<0] = 0
box[:, 2][box[:, 2]>w] = w
box[:, 3][box[:, 3]>h] = h
box_w = box[:, 2] - box[:, 0]
box_h = box[:, 3] - box[:, 1]
box = box[np.logical_and(box_w>1, box_h>1)] # discard invalid box
box_data = np.zeros((len(box),5))
box_data[:len(box)] = box
return image_data, box_data
def generate(self, train=True):
while True:
shuffle(self.train_lines)
lines = self.train_lines
inputs = []
targets = []
for annotation_line in lines:
if train:
img,y=self.get_random_data(annotation_line, self.image_size[0:2])
else:
img,y=self.get_random_data(annotation_line, self.image_size[0:2], random=False)
if len(y)!=0:
boxes = np.array(y[:,:4],dtype=np.float32)
boxes[:,0] = boxes[:,0]/self.image_size[1]
boxes[:,1] = boxes[:,1]/self.image_size[0]
boxes[:,2] = boxes[:,2]/self.image_size[1]
boxes[:,3] = boxes[:,3]/self.image_size[0]
boxes = np.maximum(np.minimum(boxes,1),0)
boxes[:,2] = boxes[:,2] - boxes[:,0]
boxes[:,3] = boxes[:,3] - boxes[:,1]
boxes[:,0] = boxes[:,0] + boxes[:,2]/2
boxes[:,1] = boxes[:,1] + boxes[:,3]/2
y = np.concatenate([boxes,y[:,-1:]],axis=-1)
img = np.array(img,dtype = np.float32)
inputs.append(np.transpose(img/255.0,(2,0,1))) plt.grid(True)
targets.append(np.array(y,dtype = np.float32)) plt.xlabel('Epoch')
if len(targets) == self.batch_size: plt.ylabel('Loss')
tmp_inp = np.array(inputs) plt.legend(loc="upper right")
tmp_targets = targets
inputs = []
targets = []
yield tmp_inp, tmp_targets
plt.savefig(os.path.join(self.save_path, "epoch_loss_" + str(self.time_str) + ".png"))
...@@ -7,11 +7,9 @@ import torch ...@@ -7,11 +7,9 @@ import torch
from torchsummary import summary from torchsummary import summary
from nets.yolo3 import YoloBody from nets.yolo3 import YoloBody
from utils.config import Config
if __name__ == "__main__": if __name__ == "__main__":
# 需要使用device来指定网络在GPU还是CPU运行 # 需要使用device来指定网络在GPU还是CPU运行
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
config = {"model_params": {"backbone_name": "darknet_53"},"yolo": {"anchors": [[1,2,3],[2,3,4],[3,4,5]],"classes": 80}} m = YoloBody([[1,2,3],[2,3,4],[3,4,5]], 80).to(device)
m = YoloBody(config).to(device)
summary(m, input_size=(3, 416, 416)) summary(m, input_size=(3, 416, 416))
#-------------------------------------# #-------------------------------------#
# 对数据集进行训练 # 对数据集进行训练
#-------------------------------------# #-------------------------------------#
import os
import time
import numpy as np import numpy as np
import torch import torch
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim import torch.optim as optim
from torch.autograd import Variable
from torch.utils.data import DataLoader from torch.utils.data import DataLoader
from tqdm import tqdm from tqdm import tqdm
from nets.yolo3 import YoloBody from nets.yolo3 import YoloBody
from nets.yolo_training import Generator, YOLOLoss from nets.yolo_training import YOLOLoss, LossHistory
from utils.config import Config
from utils.dataloader import YoloDataset, yolo_dataset_collate from utils.dataloader import YoloDataset, yolo_dataset_collate
def get_anchors(anchors_path):
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape([-1, 3, 2])[::-1, :, :]
def get_lr(optimizer): def get_lr(optimizer):
for param_group in optimizer.param_groups: for param_group in optimizer.param_groups:
return param_group['lr'] return param_group['lr']
def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda): def fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, genval, Epoch, cuda):
total_loss = 0 total_loss = 0
val_loss = 0 val_loss = 0
net.train() net.train()
print('Start Train')
with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: with tqdm(total=epoch_size,desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
for iteration, batch in enumerate(gen): for iteration, batch in enumerate(gen):
if iteration >= epoch_size: if iteration >= epoch_size:
break break
images, targets = batch[0], batch[1] images, targets = batch[0], batch[1]
with torch.no_grad(): with torch.no_grad():
if cuda: if cuda:
images = Variable(torch.from_numpy(images).type(torch.FloatTensor)).cuda() images = torch.from_numpy(images).type(torch.FloatTensor).cuda()
targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets] targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
else: else:
images = Variable(torch.from_numpy(images).type(torch.FloatTensor)) images = torch.from_numpy(images).type(torch.FloatTensor)
targets = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets] targets = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets]
#----------------------# #----------------------#
# 清零梯度 # 清零梯度
#----------------------# #----------------------#
...@@ -49,14 +49,14 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo ...@@ -49,14 +49,14 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
#----------------------# #----------------------#
# 前向传播 # 前向传播
#----------------------# #----------------------#
outputs = net(images) outputs = net(images)
losses = [] losses = []
num_pos_all = 0 num_pos_all = 0
#----------------------# #----------------------#
# 计算损失 # 计算损失
#----------------------# #----------------------#
for i in range(3): for i in range(3):
loss_item, num_pos = yolo_losses[i](outputs[i], targets) loss_item, num_pos = yolo_loss(outputs[i], targets)
losses.append(loss_item) losses.append(loss_item)
num_pos_all += num_pos num_pos_all += num_pos
...@@ -73,6 +73,8 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo ...@@ -73,6 +73,8 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
'lr' : get_lr(optimizer)}) 'lr' : get_lr(optimizer)})
pbar.update(1) pbar.update(1)
print('Finish Train')
net.eval() net.eval()
print('Start Validation') print('Start Validation')
with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar: with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
...@@ -83,29 +85,36 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo ...@@ -83,29 +85,36 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
with torch.no_grad(): with torch.no_grad():
if cuda: if cuda:
images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)).cuda() images_val = torch.from_numpy(images_val).type(torch.FloatTensor).cuda()
targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val] targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
else: else:
images_val = Variable(torch.from_numpy(images_val).type(torch.FloatTensor)) images_val = torch.from_numpy(images_val).type(torch.FloatTensor)
targets_val = [Variable(torch.from_numpy(ann).type(torch.FloatTensor)) for ann in targets_val] targets_val = [torch.from_numpy(ann).type(torch.FloatTensor) for ann in targets_val]
optimizer.zero_grad() optimizer.zero_grad()
outputs = net(images_val)
losses = [] outputs = net(images_val)
losses = []
num_pos_all = 0 num_pos_all = 0
#----------------------#
# 计算损失
#----------------------#
for i in range(3): for i in range(3):
loss_item, num_pos = yolo_losses[i](outputs[i], targets_val) loss_item, num_pos = yolo_loss(outputs[i], targets_val)
losses.append(loss_item) losses.append(loss_item)
num_pos_all += num_pos num_pos_all += num_pos
loss = sum(losses) / num_pos_all loss = sum(losses) / num_pos_all
val_loss += loss.item() val_loss += loss.item()
pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)}) pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
pbar.update(1) pbar.update(1)
loss_history.append_loss(total_loss/(epoch_size+1), val_loss/(epoch_size_val+1))
print('Finish Validation') print('Finish Validation')
print('Epoch:'+ str(epoch+1) + '/' + str(Epoch)) print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
print('Total Loss: %.4f || Val Loss: %.4f ' % (total_loss/(epoch_size+1),val_loss/(epoch_size_val+1))) print('Total Loss: %.4f || Val Loss: %.4f ' %(total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))
print('Saving state, iter:', str(epoch+1)) print('Saving state, iter:', str(epoch+1))
torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch+1),total_loss/(epoch_size+1),val_loss/(epoch_size_val+1))) torch.save(model.state_dict(), 'logs/Epoch%d-Total_Loss%.4f-Val_Loss%.4f.pth'%((epoch + 1), total_loss / (epoch_size + 1), val_loss / (epoch_size_val + 1)))
#----------------------------------------------------# #----------------------------------------------------#
# 检测精度mAP和pr曲线计算参考视频 # 检测精度mAP和pr曲线计算参考视频
...@@ -117,28 +126,39 @@ if __name__ == "__main__": ...@@ -117,28 +126,39 @@ if __name__ == "__main__":
# 没有GPU可以设置成False # 没有GPU可以设置成False
#-------------------------------# #-------------------------------#
Cuda = True Cuda = True
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader = True
#------------------------------------------------------# #------------------------------------------------------#
# 是否对损失进行归一化,用于改变loss的大小 # 是否对损失进行归一化,用于改变loss的大小
# 用于决定计算最终loss是除上batch_size还是除上正样本数量 # 用于决定计算最终loss是除上batch_size还是除上正样本数量
#------------------------------------------------------# #------------------------------------------------------#
normalize = False normalize = False
#------------------------------------------------------# #------------------------------------------------------#
# 输入的shape大小
#------------------------------------------------------#
input_shape = (416, 416)
#------------------------------------------------------#
# 视频中的Config.py已经移除
# 需要修改num_classes直接修改此处的num_classes即可
# 如果需要检测5个类, 这里就写5. 默认为20
#------------------------------------------------------#
num_classes = 20
#----------------------------------------------------#
# 先验框anchor的路径
#----------------------------------------------------#
anchors_path = 'model_data/yolo_anchors.txt'
anchors = get_anchors(anchors_path)
#------------------------------------------------------#
# 创建yolo模型 # 创建yolo模型
# 训练前一定要修改Config里面的classes参数 # 训练前一定要修改Config里面的classes参数
#------------------------------------------------------# #------------------------------------------------------#
model = YoloBody(Config) model = YoloBody(anchors, num_classes)
#------------------------------------------------------# #------------------------------------------------------#
# 权值文件请看README,百度网盘下载 # 权值文件请看README,百度网盘下载
#------------------------------------------------------# #------------------------------------------------------#
model_path = "model_data/yolo_weights.pth" model_path = "model_data/yolo_weights.pth"
print('Loading weights into state dict...') print('Loading weights into state dict...')
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model_dict = model.state_dict() model_dict = model.state_dict()
pretrained_dict = torch.load(model_path, map_location=device) pretrained_dict = torch.load(model_path, map_location=device)
pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)} pretrained_dict = {k: v for k, v in pretrained_dict.items() if np.shape(model_dict[k]) == np.shape(v)}
model_dict.update(pretrained_dict) model_dict.update(pretrained_dict)
...@@ -152,11 +172,8 @@ if __name__ == "__main__": ...@@ -152,11 +172,8 @@ if __name__ == "__main__":
cudnn.benchmark = True cudnn.benchmark = True
net = net.cuda() net = net.cuda()
# 建立loss函数 yolo_loss = YOLOLoss(np.reshape(anchors,[-1,2]), num_classes, (input_shape[1], input_shape[0]), Cuda, normalize)
yolo_losses = [] loss_history = LossHistory("logs/")
for i in range(3):
yolo_losses.append(YOLOLoss(np.reshape(Config["yolo"]["anchors"],[-1,2]),
Config["yolo"]["classes"], (Config["img_w"], Config["img_h"]), Cuda, normalize))
#----------------------------------------------------# #----------------------------------------------------#
# 获得图片路径和标签 # 获得图片路径和标签
...@@ -185,69 +202,65 @@ if __name__ == "__main__": ...@@ -185,69 +202,65 @@ if __name__ == "__main__":
# 提示OOM或者显存不足请调小Batch_size # 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------# #------------------------------------------------------#
if True: if True:
lr = 1e-3 lr = 1e-3
Batch_size = 8 Batch_size = 8
Init_Epoch = 0 Init_Epoch = 0
Freeze_Epoch = 50 Freeze_Epoch = 50
optimizer = optim.Adam(net.parameters(),lr) optimizer = optim.Adam(net.parameters(),lr)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92) lr_scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.92)
if Use_Data_Loader: train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), True)
train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True) val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), False)
val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False) gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate) drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate) drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(Config["img_h"], Config["img_w"])).generate(True)
gen_val = Generator(Batch_size, lines[num_train:],
(Config["img_h"], Config["img_w"])).generate(False)
epoch_size = num_train//Batch_size
epoch_size_val = num_val//Batch_size
#------------------------------------# #------------------------------------#
# 冻结一定部分训练 # 冻结一定部分训练
#------------------------------------# #------------------------------------#
for param in model.backbone.parameters(): for param in model.backbone.parameters():
param.requires_grad = False param.requires_grad = False
epoch_size = num_train // Batch_size
epoch_size_val = num_val // Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
for epoch in range(Init_Epoch,Freeze_Epoch): for epoch in range(Init_Epoch,Freeze_Epoch):
fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Freeze_Epoch,Cuda) fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, gen_val, Freeze_Epoch, Cuda)
lr_scheduler.step() lr_scheduler.step()
if True: if True:
lr = 1e-4 lr = 1e-4
Batch_size = 4 Batch_size = 4
Freeze_Epoch = 50 Freeze_Epoch = 50
Unfreeze_Epoch = 100 Unfreeze_Epoch = 100
optimizer = optim.Adam(net.parameters(),lr) optimizer = optim.Adam(net.parameters(),lr)
lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92) lr_scheduler = optim.lr_scheduler.StepLR(optimizer,step_size=1,gamma=0.92)
if Use_Data_Loader: train_dataset = YoloDataset(lines[:num_train], (input_shape[0], input_shape[1]), True)
train_dataset = YoloDataset(lines[:num_train], (Config["img_h"], Config["img_w"]), True) val_dataset = YoloDataset(lines[num_train:], (input_shape[0], input_shape[1]), False)
val_dataset = YoloDataset(lines[num_train:], (Config["img_h"], Config["img_w"]), False) gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
gen = DataLoader(train_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate) drop_last=True, collate_fn=yolo_dataset_collate)
gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4,pin_memory=True, gen_val = DataLoader(val_dataset, shuffle=True, batch_size=Batch_size, num_workers=4, pin_memory=True,
drop_last=True, collate_fn=yolo_dataset_collate) drop_last=True, collate_fn=yolo_dataset_collate)
else:
gen = Generator(Batch_size, lines[:num_train],
(Config["img_h"], Config["img_w"])).generate(True)
gen_val = Generator(Batch_size, lines[num_train:],
(Config["img_h"], Config["img_w"])).generate(False)
epoch_size = num_train//Batch_size
epoch_size_val = num_val//Batch_size
#------------------------------------# #------------------------------------#
# 解冻后训练 # 解冻后训练
#------------------------------------# #------------------------------------#
for param in model.backbone.parameters(): for param in model.backbone.parameters():
param.requires_grad = True param.requires_grad = True
epoch_size = num_train//Batch_size
epoch_size_val = num_val//Batch_size
if epoch_size == 0 or epoch_size_val == 0:
raise ValueError("数据集过小,无法进行训练,请扩充数据集。")
for epoch in range(Freeze_Epoch,Unfreeze_Epoch): for epoch in range(Freeze_Epoch,Unfreeze_Epoch):
fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,gen_val,Unfreeze_Epoch,Cuda) fit_one_epoch(net, yolo_loss, epoch, epoch_size, epoch_size_val, gen, gen_val, Unfreeze_Epoch, Cuda)
lr_scheduler.step() lr_scheduler.step()
from random import shuffle import cv2
import numpy as np import numpy as np
import torch
import torch.nn as nn
import math
import torch.nn.functional as F
from PIL import Image from PIL import Image
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torch.utils.data.dataset import Dataset from torch.utils.data.dataset import Dataset
from matplotlib.colors import rgb_to_hsv, hsv_to_rgb
from nets.yolo_training import Generator
import cv2
class YoloDataset(Dataset): class YoloDataset(Dataset):
def __init__(self, train_lines, image_size, is_train): def __init__(self, train_lines, image_size, is_train):
...@@ -152,7 +144,6 @@ class YoloDataset(Dataset): ...@@ -152,7 +144,6 @@ class YoloDataset(Dataset):
tmp_targets = np.array(y, dtype=np.float32) tmp_targets = np.array(y, dtype=np.float32)
return tmp_inp, tmp_targets return tmp_inp, tmp_targets
# DataLoader中collate_fn使用 # DataLoader中collate_fn使用
def yolo_dataset_collate(batch): def yolo_dataset_collate(batch):
images = [] images = []
......
from __future__ import division from __future__ import division
import math
import os
import time
import numpy as np import numpy as np
import torch import torch
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F from PIL import Image
from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from torchvision.ops import nms from torchvision.ops import nms
......
...@@ -7,7 +7,9 @@ import xml.etree.ElementTree as ET ...@@ -7,7 +7,9 @@ import xml.etree.ElementTree as ET
from os import getcwd from os import getcwd
sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')] sets=[('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
#-----------------------------------------------------#
# 这里设定的classes顺序要和model_data里的txt一样
#-----------------------------------------------------#
classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
def convert_annotation(year, image_id, list_file): def convert_annotation(year, image_id, list_file):
...@@ -19,20 +21,19 @@ def convert_annotation(year, image_id, list_file): ...@@ -19,20 +21,19 @@ def convert_annotation(year, image_id, list_file):
difficult = 0 difficult = 0
if obj.find('difficult')!=None: if obj.find('difficult')!=None:
difficult = obj.find('difficult').text difficult = obj.find('difficult').text
cls = obj.find('name').text cls = obj.find('name').text
if cls not in classes or int(difficult)==1: if cls not in classes or int(difficult)==1:
continue continue
cls_id = classes.index(cls) cls_id = classes.index(cls)
xmlbox = obj.find('bndbox') xmlbox = obj.find('bndbox')
b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) b = (int(float(xmlbox.find('xmin').text)), int(float(xmlbox.find('ymin').text)), int(float(xmlbox.find('xmax').text)), int(float(xmlbox.find('ymax').text)))
list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id)) list_file.write(" " + ",".join([str(a) for a in b]) + ',' + str(cls_id))
wd = getcwd() wd = getcwd()
for year, image_set in sets: for year, image_set in sets:
image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split() image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set), encoding='utf-8').read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w') list_file = open('%s_%s.txt'%(year, image_set), 'w', encoding='utf-8')
for image_id in image_ids: for image_id in image_ids:
list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id)) list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg'%(wd, year, image_id))
convert_annotation(year, image_id, list_file) convert_annotation(year, image_id, list_file)
......
...@@ -4,18 +4,14 @@ ...@@ -4,18 +4,14 @@
import colorsys import colorsys
import os import os
import cv2
import numpy as np import numpy as np
import torch import torch
import torch.backends.cudnn as cudnn
import torch.nn as nn import torch.nn as nn
from PIL import Image, ImageDraw, ImageFont from PIL import Image, ImageDraw, ImageFont
from torch.autograd import Variable
from nets.yolo3 import YoloBody from nets.yolo3 import YoloBody
from utils.config import Config from utils.utils import (DecodeBox, letterbox_image, non_max_suppression,
from utils.utils import (DecodeBox, bbox_iou, letterbox_image, yolo_correct_boxes)
non_max_suppression, yolo_correct_boxes)
#--------------------------------------------# #--------------------------------------------#
...@@ -27,6 +23,7 @@ from utils.utils import (DecodeBox, bbox_iou, letterbox_image, ...@@ -27,6 +23,7 @@ from utils.utils import (DecodeBox, bbox_iou, letterbox_image,
class YOLO(object): class YOLO(object):
_defaults = { _defaults = {
"model_path" : 'model_data/yolo_weights.pth', "model_path" : 'model_data/yolo_weights.pth',
"anchors_path" : 'model_data/yolo_anchors.txt',
"classes_path" : 'model_data/coco_classes.txt', "classes_path" : 'model_data/coco_classes.txt',
"model_image_size" : (416, 416, 3), "model_image_size" : (416, 416, 3),
"confidence" : 0.5, "confidence" : 0.5,
...@@ -52,8 +49,9 @@ class YOLO(object): ...@@ -52,8 +49,9 @@ class YOLO(object):
def __init__(self, **kwargs): def __init__(self, **kwargs):
self.__dict__.update(self._defaults) self.__dict__.update(self._defaults)
self.class_names = self._get_class() self.class_names = self._get_class()
self.config = Config self.anchors = self._get_anchors()
self.generate() self.generate()
#---------------------------------------------------# #---------------------------------------------------#
# 获得所有的分类 # 获得所有的分类
#---------------------------------------------------# #---------------------------------------------------#
...@@ -64,15 +62,25 @@ class YOLO(object): ...@@ -64,15 +62,25 @@ class YOLO(object):
class_names = [c.strip() for c in class_names] class_names = [c.strip() for c in class_names]
return class_names return class_names
#---------------------------------------------------#
# 获得所有的先验框
#---------------------------------------------------#
def _get_anchors(self):
anchors_path = os.path.expanduser(self.anchors_path)
with open(anchors_path) as f:
anchors = f.readline()
anchors = [float(x) for x in anchors.split(',')]
return np.array(anchors).reshape([-1, 3, 2])[::-1,:,:]
#---------------------------------------------------# #---------------------------------------------------#
# 生成模型 # 生成模型
#---------------------------------------------------# #---------------------------------------------------#
def generate(self): def generate(self):
self.config["yolo"]["classes"] = len(self.class_names) self.num_classes = len(self.class_names)
#---------------------------------------------------# #---------------------------------------------------#
# 建立yolov3模型 # 建立yolov3模型
#---------------------------------------------------# #---------------------------------------------------#
self.net = YoloBody(self.config) self.net = YoloBody(self.anchors, self.num_classes)
#---------------------------------------------------# #---------------------------------------------------#
# 载入yolov3模型的权重 # 载入yolov3模型的权重
...@@ -84,7 +92,6 @@ class YOLO(object): ...@@ -84,7 +92,6 @@ class YOLO(object):
self.net = self.net.eval() self.net = self.net.eval()
if self.cuda: if self.cuda:
os.environ["CUDA_VISIBLE_DEVICES"] = '0'
self.net = nn.DataParallel(self.net) self.net = nn.DataParallel(self.net)
self.net = self.net.cuda() self.net = self.net.cuda()
...@@ -93,7 +100,7 @@ class YOLO(object): ...@@ -93,7 +100,7 @@ class YOLO(object):
#---------------------------------------------------# #---------------------------------------------------#
self.yolo_decodes = [] self.yolo_decodes = []
for i in range(3): for i in range(3):
self.yolo_decodes.append(DecodeBox(self.config["yolo"]["anchors"][i], self.config["yolo"]["classes"], (self.model_image_size[1], self.model_image_size[0]))) self.yolo_decodes.append(DecodeBox(self.anchors[i], self.num_classes, (self.model_image_size[1], self.model_image_size[0])))
print('{} model, anchors, and classes loaded.'.format(self.model_path)) print('{} model, anchors, and classes loaded.'.format(self.model_path))
# 画框设置不同的颜色 # 画框设置不同的颜色
...@@ -108,17 +115,21 @@ class YOLO(object): ...@@ -108,17 +115,21 @@ class YOLO(object):
# 检测图片 # 检测图片
#---------------------------------------------------# #---------------------------------------------------#
def detect_image(self, image): def detect_image(self, image):
image_shape = np.array(np.shape(image)[0:2]) #---------------------------------------------------------#
# 在这里将图像转换成RGB图像,防止灰度图在预测时报错。
#---------------------------------------------------------#
image = image.convert('RGB')
image_shape = np.array(np.shape(image)[0:2])
#---------------------------------------------------------# #---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize # 给图像增加灰条,实现不失真的resize
# 也可以直接resize进行识别 # 也可以直接resize进行识别
#---------------------------------------------------------# #---------------------------------------------------------#
if self.letterbox_image: if self.letterbox_image:
crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0]))) crop_img = np.array(letterbox_image(image, (self.model_image_size[1], self.model_image_size[0])))
else: else:
crop_img = image.convert('RGB') crop_img = image.resize((self.model_image_size[1], self.model_image_size[0]), Image.BICUBIC)
crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
photo = np.array(crop_img,dtype = np.float32) / 255.0 photo = np.array(crop_img,dtype = np.float32) / 255.0
photo = np.transpose(photo, (2, 0, 1)) photo = np.transpose(photo, (2, 0, 1))
#---------------------------------------------------------# #---------------------------------------------------------#
...@@ -143,9 +154,7 @@ class YOLO(object): ...@@ -143,9 +154,7 @@ class YOLO(object):
# 将预测框进行堆叠,然后进行非极大抑制 # 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------# #---------------------------------------------------------#
output = torch.cat(output_list, 1) output = torch.cat(output_list, 1)
batch_detections = non_max_suppression(output, self.config["yolo"]["classes"], batch_detections = non_max_suppression(output, self.num_classes, conf_thres=self.confidence, nms_thres=self.iou)
conf_thres=self.confidence,
nms_thres=self.iou)
#---------------------------------------------------------# #---------------------------------------------------------#
# 如果没有检测出物体,返回原图 # 如果没有检测出物体,返回原图
...@@ -158,10 +167,10 @@ class YOLO(object): ...@@ -158,10 +167,10 @@ class YOLO(object):
#---------------------------------------------------------# #---------------------------------------------------------#
# 对预测框进行得分筛选 # 对预测框进行得分筛选
#---------------------------------------------------------# #---------------------------------------------------------#
top_index = batch_detections[:,4] * batch_detections[:,5] > self.confidence top_index = batch_detections[:, 4] * batch_detections[:, 5] > self.confidence
top_conf = batch_detections[top_index,4]*batch_detections[top_index,5] top_conf = batch_detections[top_index, 4] * batch_detections[top_index, 5]
top_label = np.array(batch_detections[top_index,-1],np.int32) top_label = np.array(batch_detections[top_index, -1],np.int32)
top_bboxes = np.array(batch_detections[top_index,:4]) top_bboxes = np.array(batch_detections[top_index, :4])
top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1) top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
#-----------------------------------------------------------------# #-----------------------------------------------------------------#
...@@ -170,7 +179,7 @@ class YOLO(object): ...@@ -170,7 +179,7 @@ class YOLO(object):
# 我们需要对其进行修改,去除灰条的部分。 # 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------# #-----------------------------------------------------------------#
if self.letterbox_image: if self.letterbox_image:
boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape) boxes = yolo_correct_boxes(top_ymin, top_xmin, top_ymax, top_xmax, np.array([self.model_image_size[0],self.model_image_size[1]]), image_shape)
else: else:
top_xmin = top_xmin / self.model_image_size[1] * image_shape[1] top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
top_ymin = top_ymin / self.model_image_size[0] * image_shape[0] top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册