Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
弱水三千2333
yolo3-pytorch
提交
30094fa5
Y
yolo3-pytorch
项目概览
弱水三千2333
/
yolo3-pytorch
与 Fork 源项目一致
从无法访问的项目Fork
通知
2
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Y
yolo3-pytorch
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
30094fa5
编写于
1月 13, 2021
作者:
B
Bubbliiiing
提交者:
GitHub
1月 13, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add files via upload
上级
742158d5
变更
15
展开全部
隐藏空白更改
内联
并排
Showing
15 changed file
with
666 addition
and
243 deletion
+666
-243
FPS_test.py
FPS_test.py
+9
-9
VOCdevkit/VOC2007/voc2yolo3.py
VOCdevkit/VOC2007/voc2yolo3.py
+10
-2
get_dr_txt.py
get_dr_txt.py
+60
-35
nets/darknet.py
nets/darknet.py
+20
-4
nets/yolo3.py
nets/yolo3.py
+47
-9
nets/yolo_training.py
nets/yolo_training.py
+215
-56
predict.py
predict.py
+9
-4
test.py
test.py
+1
-0
train.py
train.py
+80
-44
utils/config.py
utils/config.py
+9
-1
utils/dataloader.py
utils/dataloader.py
+40
-12
utils/utils.py
utils/utils.py
+76
-22
video.py
video.py
+16
-11
voc_annotation.py
voc_annotation.py
+5
-0
yolo.py
yolo.py
+69
-34
未找到文件。
FPS_test.py
浏览文件 @
30094fa5
...
...
@@ -25,20 +25,20 @@ video.py里面测试的FPS会低于该FPS,因为摄像头的读取频率有限
'''
class
FPS_YOLO
(
YOLO
):
def
get_FPS
(
self
,
image
,
test_interval
):
# 调整图片使其符合输入要求
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
images
=
np
.
asarray
(
images
)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
images
)
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
)
)
if
self
.
cuda
:
images
=
images
.
cuda
()
outputs
=
self
.
net
(
images
)
...
...
VOCdevkit/VOC2007/voc2yolo3.py
浏览文件 @
30094fa5
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# test.txt和val.txt里面没有内容是正常的。训练不会使用到。
#----------------------------------------------------------------------#
import
os
import
random
random
.
seed
(
0
)
xmlfilepath
=
r
'./VOCdevkit/VOC2007/Annotations'
saveBasePath
=
r
"./VOCdevkit/VOC2007/ImageSets/Main/"
#----------------------------------------------------------------------#
# 想要增加测试集修改trainval_percent
# train_percent不需要修改
#----------------------------------------------------------------------#
trainval_percent
=
1
train_percent
=
1
...
...
get_dr_txt.py
浏览文件 @
30094fa5
#-------------------------------------#
# mAP所需文件计算代码
# 具体教程请查看Bilibili
# Bubbliiiing
#-------------------------------------#
import
cv2
import
numpy
as
np
#----------------------------------------------------#
# 获取测试集的detection-result和images-optional
# 具体视频教程可查看
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
import
colorsys
import
os
import
cv2
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.backends.cudnn
as
cudnn
import
torch.nn
as
nn
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
yolo
import
YOLO
from
tqdm
import
tqdm
from
nets.yolo3
import
YoloBody
from
PIL
import
Image
,
ImageFont
,
ImageDraw
from
utils.config
import
Config
from
utils.utils
import
non_max_suppression
,
bbox_iou
,
DecodeBox
,
letterbox_image
,
yolo_correct_boxes
from
tqdm
import
tqdm
from
utils.utils
import
(
DecodeBox
,
bbox_iou
,
letterbox_image
,
non_max_suppression
,
yolo_correct_boxes
)
from
yolo
import
YOLO
class
mAP_Yolo
(
YOLO
):
#---------------------------------------------------#
...
...
@@ -28,40 +32,61 @@ class mAP_Yolo(YOLO):
f
=
open
(
"./input/detection-results/"
+
image_id
+
".txt"
,
"w"
)
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
images
=
np
.
asarray
(
images
)
images
=
torch
.
from_numpy
(
images
)
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
))
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs
=
self
.
net
(
images
)
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
self
.
config
[
"yolo"
][
"classes"
],
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
# 去掉灰条
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
for
i
,
c
in
enumerate
(
top_label
):
predicted_class
=
self
.
class_names
[
c
]
...
...
nets/darknet.py
浏览文件 @
30094fa5
import
torch
import
torch.nn
as
nn
import
math
from
collections
import
OrderedDict
# 基本的darknet块
import
torch
import
torch.nn
as
nn
#---------------------------------------------------------------------#
# 残差结构
# 利用一个1x1卷积下降通道数,然后利用一个3x3卷积提取特征并且上升通道数
# 最后接上一个残差边
#---------------------------------------------------------------------#
class
BasicBlock
(
nn
.
Module
):
def
__init__
(
self
,
inplanes
,
planes
):
super
(
BasicBlock
,
self
).
__init__
()
...
...
@@ -36,14 +42,20 @@ class DarkNet(nn.Module):
def
__init__
(
self
,
layers
):
super
(
DarkNet
,
self
).
__init__
()
self
.
inplanes
=
32
# 416,416,3 -> 416,416,32
self
.
conv1
=
nn
.
Conv2d
(
3
,
self
.
inplanes
,
kernel_size
=
3
,
stride
=
1
,
padding
=
1
,
bias
=
False
)
self
.
bn1
=
nn
.
BatchNorm2d
(
self
.
inplanes
)
self
.
relu1
=
nn
.
LeakyReLU
(
0.1
)
# 416,416,32 -> 208,208,64
self
.
layer1
=
self
.
_make_layer
([
32
,
64
],
layers
[
0
])
# 208,208,64 -> 104,104,128
self
.
layer2
=
self
.
_make_layer
([
64
,
128
],
layers
[
1
])
# 104,104,128 -> 52,52,256
self
.
layer3
=
self
.
_make_layer
([
128
,
256
],
layers
[
2
])
# 52,52,256 -> 26,26,512
self
.
layer4
=
self
.
_make_layer
([
256
,
512
],
layers
[
3
])
# 26,26,512 -> 13,13,1024
self
.
layer5
=
self
.
_make_layer
([
512
,
1024
],
layers
[
4
])
self
.
layers_out_filters
=
[
64
,
128
,
256
,
512
,
1024
]
...
...
@@ -57,6 +69,10 @@ class DarkNet(nn.Module):
m
.
weight
.
data
.
fill_
(
1
)
m
.
bias
.
data
.
zero_
()
#---------------------------------------------------------------------#
# 在每一个layer里面,首先利用一个步长为2的3x3卷积进行下采样
# 然后进行残差结构的堆叠
#---------------------------------------------------------------------#
def
_make_layer
(
self
,
planes
,
blocks
):
layers
=
[]
# 下采样,步长为2,卷积核大小为3
...
...
@@ -64,7 +80,7 @@ class DarkNet(nn.Module):
stride
=
2
,
padding
=
1
,
bias
=
False
)))
layers
.
append
((
"ds_bn"
,
nn
.
BatchNorm2d
(
planes
[
1
])))
layers
.
append
((
"ds_relu"
,
nn
.
LeakyReLU
(
0.1
)))
# 加入
darknet模块
# 加入
残差结构
self
.
inplanes
=
planes
[
1
]
for
i
in
range
(
0
,
blocks
):
layers
.
append
((
"residual_{}"
.
format
(
i
),
BasicBlock
(
self
.
inplanes
,
planes
)))
...
...
nets/yolo3.py
浏览文件 @
30094fa5
from
collections
import
OrderedDict
import
torch
import
torch.nn
as
nn
from
collections
import
OrderedDict
from
nets.darknet
import
darknet53
def
conv2d
(
filter_in
,
filter_out
,
kernel_size
):
pad
=
(
kernel_size
-
1
)
//
2
if
kernel_size
else
0
return
nn
.
Sequential
(
OrderedDict
([
...
...
@@ -11,6 +14,10 @@ def conv2d(filter_in, filter_out, kernel_size):
(
"relu"
,
nn
.
LeakyReLU
(
0.1
)),
]))
#------------------------------------------------------------------------#
# make_last_layers里面一共有七个卷积,前五个用于提取特征。
# 后两个用于获得yolo网络的预测结果
#------------------------------------------------------------------------#
def
make_last_layers
(
filters_list
,
in_filters
,
out_filter
):
m
=
nn
.
ModuleList
([
conv2d
(
in_filters
,
filters_list
[
0
],
1
),
...
...
@@ -28,21 +35,30 @@ class YoloBody(nn.Module):
def
__init__
(
self
,
config
):
super
(
YoloBody
,
self
).
__init__
()
self
.
config
=
config
# backbone
#---------------------------------------------------#
# 生成darknet53的主干模型
# 获得三个有效特征层,他们的shape分别是:
# 13,13,256
# 26,26,512
# 13,13,1024
#---------------------------------------------------#
self
.
backbone
=
darknet53
(
None
)
# out_filters : [64, 128, 256, 512, 1024]
out_filters
=
self
.
backbone
.
layers_out_filters
# last_layer0
#------------------------------------------------------------------------#
# 计算yolo_head的输出通道数,对于voc数据集而言
# final_out_filter0 = final_out_filter1 = final_out_filter2 = 75
#------------------------------------------------------------------------#
final_out_filter0
=
len
(
config
[
"yolo"
][
"anchors"
][
0
])
*
(
5
+
config
[
"yolo"
][
"classes"
])
self
.
last_layer0
=
make_last_layers
([
512
,
1024
],
out_filters
[
-
1
],
final_out_filter0
)
# embedding1
final_out_filter1
=
len
(
config
[
"yolo"
][
"anchors"
][
1
])
*
(
5
+
config
[
"yolo"
][
"classes"
])
self
.
last_layer1_conv
=
conv2d
(
512
,
256
,
1
)
self
.
last_layer1_upsample
=
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'nearest'
)
self
.
last_layer1
=
make_last_layers
([
256
,
512
],
out_filters
[
-
2
]
+
256
,
final_out_filter1
)
# embedding2
final_out_filter2
=
len
(
config
[
"yolo"
][
"anchors"
][
2
])
*
(
5
+
config
[
"yolo"
][
"classes"
])
self
.
last_layer2_conv
=
conv2d
(
256
,
128
,
1
)
self
.
last_layer2_upsample
=
nn
.
Upsample
(
scale_factor
=
2
,
mode
=
'nearest'
)
...
...
@@ -56,21 +72,43 @@ class YoloBody(nn.Module):
if
i
==
4
:
out_branch
=
layer_in
return
layer_in
,
out_branch
# backbone
#---------------------------------------------------#
# 获得三个有效特征层,他们的shape分别是:
# 13,13,256;26,26,512;13,13,1024
#---------------------------------------------------#
x2
,
x1
,
x0
=
self
.
backbone
(
x
)
# yolo branch 0
#---------------------------------------------------#
# 第一个特征层
# out0 = (batch_size,255,13,13)
#---------------------------------------------------#
# 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512 -> 13,13,1024 -> 13,13,512
out0
,
out0_branch
=
_branch
(
self
.
last_layer0
,
x0
)
#
yolo branch 1
#
13,13,512 -> 13,13,256 -> 26,26,256
x1_in
=
self
.
last_layer1_conv
(
out0_branch
)
x1_in
=
self
.
last_layer1_upsample
(
x1_in
)
# 26,26,256 + 26,26,512 -> 26,26,768
x1_in
=
torch
.
cat
([
x1_in
,
x1
],
1
)
#---------------------------------------------------#
# 第二个特征层
# out1 = (batch_size,255,26,26)
#---------------------------------------------------#
# 26,26,768 -> 26,26,256 -> 26,26,512 -> 26,26,256 -> 26,26,512 -> 26,26,256
out1
,
out1_branch
=
_branch
(
self
.
last_layer1
,
x1_in
)
#
yolo branch 2
#
26,26,256 -> 26,26,128 -> 52,52,128
x2_in
=
self
.
last_layer2_conv
(
out1_branch
)
x2_in
=
self
.
last_layer2_upsample
(
x2_in
)
# 52,52,128 + 52,52,256 -> 52,52,384
x2_in
=
torch
.
cat
([
x2_in
,
x2
],
1
)
#---------------------------------------------------#
# 第一个特征层
# out3 = (batch_size,255,52,52)
#---------------------------------------------------#
# 52,52,384 -> 52,52,128 -> 52,52,256 -> 52,52,128 -> 52,52,256 -> 52,52,128
out2
,
_
=
_branch
(
self
.
last_layer2
,
x2_in
)
return
out0
,
out1
,
out2
nets/yolo_training.py
浏览文件 @
30094fa5
此差异已折叠。
点击以展开。
predict.py
浏览文件 @
30094fa5
#-------------------------------------#
# 对单张图片进行预测
#-------------------------------------#
from
yolo
import
YOLO
'''
predict.py有几个注意点
1、无法进行批量预测,如果想要批量预测,可以利用os.listdir()遍历文件夹,利用Image.open打开图片文件进行预测。
2、如果想要保存,利用r_image.save("img.jpg")即可保存。
3、如果想要获得框的坐标,可以进入detect_image函数,读取top,left,bottom,right这四个值。
4、如果想要截取下目标,可以利用获取到的top,left,bottom,right这四个值在原图上利用矩阵的方式进行截取。
'''
from
PIL
import
Image
from
yolo
import
YOLO
yolo
=
YOLO
()
while
True
:
...
...
test.py
浏览文件 @
30094fa5
...
...
@@ -5,6 +5,7 @@
#--------------------------------------------#
import
torch
from
torchsummary
import
summary
from
nets.yolo3
import
YoloBody
from
utils.config
import
Config
...
...
train.py
浏览文件 @
30094fa5
...
...
@@ -2,21 +2,24 @@
# 对数据集进行训练
#-------------------------------------#
import
os
import
numpy
as
np
import
time
import
numpy
as
np
import
torch
from
torch.autograd
import
Variable
import
torch.backends.cudnn
as
cudnn
import
torch.nn
as
nn
import
torch.optim
as
optim
import
torch.nn.functional
as
F
import
torch.
backends.cudnn
as
cudnn
from
utils.config
import
Config
import
torch.
optim
as
optim
from
torch.autograd
import
Variable
from
torch.utils.data
import
DataLoader
from
utils.dataloader
import
yolo_dataset_collate
,
YoloDataset
from
nets.yolo_training
import
YOLOLoss
,
Generator
from
nets.yolo3
import
YoloBody
from
tqdm
import
tqdm
from
nets.yolo3
import
YoloBody
from
nets.yolo_training
import
Generator
,
YOLOLoss
from
utils.config
import
Config
from
utils.dataloader
import
YoloDataset
,
yolo_dataset_collate
def
get_lr
(
optimizer
):
for
param_group
in
optimizer
.
param_groups
:
return
param_group
[
'lr'
]
...
...
@@ -24,7 +27,8 @@ def get_lr(optimizer):
def
fit_ont_epoch
(
net
,
yolo_losses
,
epoch
,
epoch_size
,
epoch_size_val
,
gen
,
genval
,
Epoch
,
cuda
):
total_loss
=
0
val_loss
=
0
start_time
=
time
.
time
()
net
.
train
()
with
tqdm
(
total
=
epoch_size
,
desc
=
f
'Epoch
{
epoch
+
1
}
/
{
Epoch
}
'
,
postfix
=
dict
,
mininterval
=
0.3
)
as
pbar
:
for
iteration
,
batch
in
enumerate
(
gen
):
if
iteration
>=
epoch_size
:
...
...
@@ -37,25 +41,38 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
else
:
images
=
Variable
(
torch
.
from_numpy
(
images
).
type
(
torch
.
FloatTensor
))
targets
=
[
Variable
(
torch
.
from_numpy
(
ann
).
type
(
torch
.
FloatTensor
))
for
ann
in
targets
]
#----------------------#
# 清零梯度
#----------------------#
optimizer
.
zero_grad
()
#----------------------#
# 前向传播
#----------------------#
outputs
=
net
(
images
)
losses
=
[]
num_pos_all
=
0
#----------------------#
# 计算损失
#----------------------#
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos
#----------------------#
# 反向传播
#----------------------#
loss
.
backward
()
optimizer
.
step
()
total_loss
+=
loss
waste_time
=
time
.
time
()
-
start_time
total_loss
+=
loss
.
item
()
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
.
item
()
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
),
'step/s'
:
waste_time
})
pbar
.
set_postfix
(
**
{
'total_loss'
:
total_loss
/
(
iteration
+
1
),
'lr'
:
get_lr
(
optimizer
)})
pbar
.
update
(
1
)
start_time
=
time
.
time
()
net
.
eval
()
print
(
'Start Validation'
)
with
tqdm
(
total
=
epoch_size_val
,
desc
=
f
'Epoch
{
epoch
+
1
}
/
{
Epoch
}
'
,
postfix
=
dict
,
mininterval
=
0.3
)
as
pbar
:
...
...
@@ -74,14 +91,15 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
optimizer
.
zero_grad
()
outputs
=
net
(
images_val
)
losses
=
[]
num_pos_all
=
0
for
i
in
range
(
3
):
loss_item
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
[
0
])
loss
=
sum
(
losses
)
val_loss
+=
loss
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
.
item
()
/
(
iteration
+
1
)})
loss_item
,
num_pos
=
yolo_losses
[
i
](
outputs
[
i
],
targets_val
)
losses
.
append
(
loss_item
)
num_pos_all
+=
num_pos
loss
=
sum
(
losses
)
/
num_pos
val_loss
+=
loss
.
item
()
pbar
.
set_postfix
(
**
{
'total_loss'
:
val_loss
/
(
iteration
+
1
)})
pbar
.
update
(
1
)
net
.
train
()
print
(
'Finish Validation'
)
print
(
'Epoch:'
+
str
(
epoch
+
1
)
+
'/'
+
str
(
Epoch
))
print
(
'Total Loss: %.4f || Val Loss: %.4f '
%
(
total_loss
/
(
epoch_size
+
1
),
val_loss
/
(
epoch_size_val
+
1
)))
...
...
@@ -94,22 +112,33 @@ def fit_ont_epoch(net,yolo_losses,epoch,epoch_size,epoch_size_val,gen,genval,Epo
# https://www.bilibili.com/video/BV1zE411u7Vw
#----------------------------------------------------#
if
__name__
==
"__main__"
:
# 参数初始化
annotation_path
=
'2007_train.txt'
model
=
YoloBody
(
Config
)
#-------------------------------#
# 是否使用Cuda
# 没有GPU可以设置成False
#-------------------------------#
Cuda
=
True
#-------------------------------#
# Dataloder的使用
#-------------------------------#
Use_Data_Loader
=
True
#------------------------------------------------------#
# 是否对损失进行归一化
#------------------------------------------------------#
normalize
=
True
#------------------------------------------------------#
# 创建yolo模型
# 训练前一定要修改Config里面的classes参数
#------------------------------------------------------#
model
=
YoloBody
(
Config
)
#-------------------------------------------#
# 权值文件的下载请看README
#-------------------------------------------#
#------------------------------------------------------#
# 权值文件请看README,百度网盘下载
#------------------------------------------------------#
model_path
=
"model_data/yolo_weights.pth"
print
(
'Loading weights into state dict...'
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
model_dict
=
model
.
state_dict
()
pretrained_dict
=
torch
.
load
(
"model_data/yolo_weights.pth"
,
map_location
=
device
)
pretrained_dict
=
torch
.
load
(
model_path
,
map_location
=
device
)
pretrained_dict
=
{
k
:
v
for
k
,
v
in
pretrained_dict
.
items
()
if
np
.
shape
(
model_dict
[
k
])
==
np
.
shape
(
v
)}
model_dict
.
update
(
pretrained_dict
)
model
.
load_state_dict
(
model_dict
)
...
...
@@ -126,9 +155,17 @@ if __name__ == "__main__":
yolo_losses
=
[]
for
i
in
range
(
3
):
yolo_losses
.
append
(
YOLOLoss
(
np
.
reshape
(
Config
[
"yolo"
][
"anchors"
],[
-
1
,
2
]),
Config
[
"yolo"
][
"classes"
],
(
Config
[
"img_w"
],
Config
[
"img_h"
]),
Cuda
))
Config
[
"yolo"
][
"classes"
],
(
Config
[
"img_w"
],
Config
[
"img_h"
]),
Cuda
,
normalize
))
# 0.1用于验证,0.9用于训练
#----------------------------------------------------#
# 获得图片路径和标签
#----------------------------------------------------#
annotation_path
=
'2007_train.txt'
#----------------------------------------------------------------------#
# 验证集的划分在train.py代码里面进行
# 2007_test.txt和2007_val.txt里面没有内容是正常的。训练不会使用到。
# 当前划分方式下,验证集和训练集的比例为1:9
#----------------------------------------------------------------------#
val_split
=
0.1
with
open
(
annotation_path
)
as
f
:
lines
=
f
.
readlines
()
...
...
@@ -138,17 +175,15 @@ if __name__ == "__main__":
num_val
=
int
(
len
(
lines
)
*
val_split
)
num_train
=
len
(
lines
)
-
num_val
#------------------------------------------------------#
# 主干特征提取网络特征通用,冻结训练可以加快训练速度
# 也可以在训练初期防止权值被破坏。
# Init_Epoch为起始世代
# Freeze_Epoch为冻结训练的世代
# Epoch总训练世代
#
Unfreeze_
Epoch总训练世代
# 提示OOM或者显存不足请调小Batch_size
#------------------------------------------------------#
if
True
:
# 最开始使用1e-3的学习率可以收敛的更快
lr
=
1e-3
Batch_size
=
8
Init_Epoch
=
0
...
...
@@ -158,17 +193,17 @@ if __name__ == "__main__":
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
]))
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
]))
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
])
,
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
])
,
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
()
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
(
True
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
()
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
(
False
)
epoch_size
=
num_train
//
Batch_size
epoch_size_val
=
num_val
//
Batch_size
...
...
@@ -190,18 +225,19 @@ if __name__ == "__main__":
optimizer
=
optim
.
Adam
(
net
.
parameters
(),
lr
)
lr_scheduler
=
optim
.
lr_scheduler
.
StepLR
(
optimizer
,
step_size
=
1
,
gamma
=
0.95
)
if
Use_Data_Loader
:
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
]))
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
]))
train_dataset
=
YoloDataset
(
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
])
,
True
)
val_dataset
=
YoloDataset
(
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
])
,
False
)
gen
=
DataLoader
(
train_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
gen_val
=
DataLoader
(
val_dataset
,
shuffle
=
True
,
batch_size
=
Batch_size
,
num_workers
=
4
,
pin_memory
=
True
,
drop_last
=
True
,
collate_fn
=
yolo_dataset_collate
)
else
:
gen
=
Generator
(
Batch_size
,
lines
[:
num_train
],
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
()
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
(
True
)
gen_val
=
Generator
(
Batch_size
,
lines
[
num_train
:],
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
()
(
Config
[
"img_h"
],
Config
[
"img_w"
])).
generate
(
False
)
epoch_size
=
num_train
//
Batch_size
epoch_size_val
=
num_val
//
Batch_size
...
...
utils/config.py
浏览文件 @
30094fa5
Config
=
\
{
{
#-------------------------------------------------------------#
# 训练前一定要修改classes参数
# anchors可以不修改,因为anchors的通用性较大
# 而且大中小的设置非常符合yolo的特征层情况
#-------------------------------------------------------------#
"yolo"
:
{
"anchors"
:
[[[
116
,
90
],
[
156
,
198
],
[
373
,
326
]],
[[
30
,
61
],
[
62
,
45
],
[
59
,
119
]],
[[
10
,
13
],
[
16
,
30
],
[
33
,
23
]]],
"classes"
:
20
,
},
#-------------------------------------------------------------#
# img_h和img_w可以修改成608x608
#-------------------------------------------------------------#
"img_h"
:
416
,
"img_w"
:
416
,
}
utils/dataloader.py
浏览文件 @
30094fa5
...
...
@@ -13,12 +13,13 @@ from nets.yolo_training import Generator
import
cv2
class
YoloDataset
(
Dataset
):
def
__init__
(
self
,
train_lines
,
image_size
):
def
__init__
(
self
,
train_lines
,
image_size
,
is_train
):
super
(
YoloDataset
,
self
).
__init__
()
self
.
train_lines
=
train_lines
self
.
train_batches
=
len
(
train_lines
)
self
.
image_size
=
image_size
self
.
is_train
=
is_train
def
__len__
(
self
):
return
self
.
train_batches
...
...
@@ -26,7 +27,7 @@ class YoloDataset(Dataset):
def
rand
(
self
,
a
=
0
,
b
=
1
):
return
np
.
random
.
rand
()
*
(
b
-
a
)
+
a
def
get_random_data
(
self
,
annotation_line
,
input_shape
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
):
def
get_random_data
(
self
,
annotation_line
,
input_shape
,
jitter
=
.
3
,
hue
=
.
1
,
sat
=
1.5
,
val
=
1.5
,
random
=
True
):
"""实时数据增强的随机预处理"""
line
=
annotation_line
.
split
()
image
=
Image
.
open
(
line
[
0
])
...
...
@@ -34,6 +35,35 @@ class YoloDataset(Dataset):
h
,
w
=
input_shape
box
=
np
.
array
([
np
.
array
(
list
(
map
(
int
,
box
.
split
(
','
))))
for
box
in
line
[
1
:]])
if
not
random
:
scale
=
min
(
w
/
iw
,
h
/
ih
)
nw
=
int
(
iw
*
scale
)
nh
=
int
(
ih
*
scale
)
dx
=
(
w
-
nw
)
//
2
dy
=
(
h
-
nh
)
//
2
image
=
image
.
resize
((
nw
,
nh
),
Image
.
BICUBIC
)
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image_data
=
np
.
array
(
new_image
,
np
.
float32
)
# 调整目标框坐标
box_data
=
np
.
zeros
((
len
(
box
),
5
))
if
len
(
box
)
>
0
:
np
.
random
.
shuffle
(
box
)
box
[:,
[
0
,
2
]]
=
box
[:,
[
0
,
2
]]
*
nw
/
iw
+
dx
box
[:,
[
1
,
3
]]
=
box
[:,
[
1
,
3
]]
*
nh
/
ih
+
dy
box
[:,
0
:
2
][
box
[:,
0
:
2
]
<
0
]
=
0
box
[:,
2
][
box
[:,
2
]
>
w
]
=
w
box
[:,
3
][
box
[:,
3
]
>
h
]
=
h
box_w
=
box
[:,
2
]
-
box
[:,
0
]
box_h
=
box
[:,
3
]
-
box
[:,
1
]
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# 保留有效框
box_data
=
np
.
zeros
((
len
(
box
),
5
))
box_data
[:
len
(
box
)]
=
box
return
image_data
,
box_data
# 调整图片大小
new_ar
=
w
/
h
*
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
/
self
.
rand
(
1
-
jitter
,
1
+
jitter
)
scale
=
self
.
rand
(.
25
,
2
)
...
...
@@ -48,8 +78,7 @@ class YoloDataset(Dataset):
# 放置图片
dx
=
int
(
self
.
rand
(
0
,
w
-
nw
))
dy
=
int
(
self
.
rand
(
0
,
h
-
nh
))
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
np
.
random
.
randint
(
0
,
255
),
np
.
random
.
randint
(
0
,
255
),
np
.
random
.
randint
(
0
,
255
)))
new_image
=
Image
.
new
(
'RGB'
,
(
w
,
h
),
(
128
,
128
,
128
))
new_image
.
paste
(
image
,
(
dx
,
dy
))
image
=
new_image
...
...
@@ -89,19 +118,18 @@ class YoloDataset(Dataset):
box
=
box
[
np
.
logical_and
(
box_w
>
1
,
box_h
>
1
)]
# 保留有效框
box_data
=
np
.
zeros
((
len
(
box
),
5
))
box_data
[:
len
(
box
)]
=
box
if
len
(
box
)
==
0
:
return
image_data
,
[]
if
(
box_data
[:,
:
4
]
>
0
).
any
():
return
image_data
,
box_data
else
:
return
image_data
,
[]
return
image_data
,
box_data
def
__getitem__
(
self
,
index
):
lines
=
self
.
train_lines
n
=
self
.
train_batches
index
=
index
%
n
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
])
if
self
.
is_train
:
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
])
else
:
img
,
y
=
self
.
get_random_data
(
lines
[
index
],
self
.
image_size
[
0
:
2
],
False
)
if
len
(
y
)
!=
0
:
# 从坐标转换成0~1的百分比
boxes
=
np
.
array
(
y
[:,
:
4
],
dtype
=
np
.
float32
)
...
...
utils/utils.py
浏览文件 @
30094fa5
from
__future__
import
division
import
os
import
math
import
os
import
time
import
numpy
as
np
import
torch
import
torch.nn
as
nn
import
torch.nn.functional
as
F
import
numpy
as
np
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
torchvision.ops
import
nms
from
PIL
import
Image
,
ImageDraw
,
ImageFont
class
DecodeBox
(
nn
.
Module
):
def
__init__
(
self
,
anchors
,
num_classes
,
img_size
):
super
(
DecodeBox
,
self
).
__init__
()
#-----------------------------------------------------------#
# 13x13的特征层对应的anchor是[116,90],[156,198],[373,326]
# 26x26的特征层对应的anchor是[30,61],[62,45],[59,119]
# 52x52的特征层对应的anchor是[10,13],[16,30],[33,23]
#-----------------------------------------------------------#
self
.
anchors
=
anchors
self
.
num_anchors
=
len
(
anchors
)
self
.
num_classes
=
num_classes
...
...
@@ -20,17 +28,33 @@ class DecodeBox(nn.Module):
self
.
img_size
=
img_size
def
forward
(
self
,
input
):
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 255, 13, 13
# batch_size, 255, 26, 26
# batch_size, 255, 52, 52
#-----------------------------------------------#
batch_size
=
input
.
size
(
0
)
input_height
=
input
.
size
(
2
)
input_width
=
input
.
size
(
3
)
# 计算步长
#-----------------------------------------------#
# 输入为416x416时
# stride_h = stride_w = 32、16、8
#-----------------------------------------------#
stride_h
=
self
.
img_size
[
1
]
/
input_height
stride_w
=
self
.
img_size
[
0
]
/
input_width
# 归一到特征层上
#-------------------------------------------------#
# 此时获得的scaled_anchors大小是相对于特征层的
#-------------------------------------------------#
scaled_anchors
=
[(
anchor_width
/
stride_w
,
anchor_height
/
stride_h
)
for
anchor_width
,
anchor_height
in
self
.
anchors
]
# 对预测结果进行resize
#-----------------------------------------------#
# 输入的input一共有三个,他们的shape分别是
# batch_size, 3, 13, 13, 85
# batch_size, 3, 26, 26, 85
# batch_size, 3, 52, 52, 85
#-----------------------------------------------#
prediction
=
input
.
view
(
batch_size
,
self
.
num_anchors
,
self
.
bbox_attrs
,
input_height
,
input_width
).
permute
(
0
,
1
,
3
,
4
,
2
).
contiguous
()
...
...
@@ -38,37 +62,48 @@ class DecodeBox(nn.Module):
x
=
torch
.
sigmoid
(
prediction
[...,
0
])
y
=
torch
.
sigmoid
(
prediction
[...,
1
])
# 先验框的宽高调整参数
w
=
prediction
[...,
2
]
# Width
h
=
prediction
[...,
3
]
# Height
w
=
prediction
[...,
2
]
h
=
prediction
[...,
3
]
# 获得置信度,是否有物体
conf
=
torch
.
sigmoid
(
prediction
[...,
4
])
# 种类置信度
pred_cls
=
torch
.
sigmoid
(
prediction
[...,
5
:])
# Cls pred.
pred_cls
=
torch
.
sigmoid
(
prediction
[...,
5
:])
FloatTensor
=
torch
.
cuda
.
FloatTensor
if
x
.
is_cuda
else
torch
.
FloatTensor
LongTensor
=
torch
.
cuda
.
LongTensor
if
x
.
is_cuda
else
torch
.
LongTensor
# 生成网格,先验框中心,网格左上角 batch_size,3,13,13
#----------------------------------------------------------#
# 生成网格,先验框中心,网格左上角
# batch_size,3,13,13
#----------------------------------------------------------#
grid_x
=
torch
.
linspace
(
0
,
input_width
-
1
,
input_width
).
repeat
(
input_height
,
1
).
repeat
(
batch_size
*
self
.
num_anchors
,
1
,
1
).
view
(
x
.
shape
).
type
(
FloatTensor
)
grid_y
=
torch
.
linspace
(
0
,
input_height
-
1
,
input_height
).
repeat
(
input_width
,
1
).
t
().
repeat
(
batch_size
*
self
.
num_anchors
,
1
,
1
).
view
(
y
.
shape
).
type
(
FloatTensor
)
# 生成先验框的宽高
#----------------------------------------------------------#
# 按照网格格式生成先验框的宽高
# batch_size,3,13,13
#----------------------------------------------------------#
anchor_w
=
FloatTensor
(
scaled_anchors
).
index_select
(
1
,
LongTensor
([
0
]))
anchor_h
=
FloatTensor
(
scaled_anchors
).
index_select
(
1
,
LongTensor
([
1
]))
anchor_w
=
anchor_w
.
repeat
(
batch_size
,
1
).
repeat
(
1
,
1
,
input_height
*
input_width
).
view
(
w
.
shape
)
anchor_h
=
anchor_h
.
repeat
(
batch_size
,
1
).
repeat
(
1
,
1
,
input_height
*
input_width
).
view
(
h
.
shape
)
# 计算调整后的先验框中心与宽高
#----------------------------------------------------------#
# 利用预测结果对先验框进行调整
# 首先调整先验框的中心,从先验框中心向右下角偏移
# 再调整先验框的宽高。
#----------------------------------------------------------#
pred_boxes
=
FloatTensor
(
prediction
[...,
:
4
].
shape
)
pred_boxes
[...,
0
]
=
x
.
data
+
grid_x
pred_boxes
[...,
1
]
=
y
.
data
+
grid_y
pred_boxes
[...,
2
]
=
torch
.
exp
(
w
.
data
)
*
anchor_w
pred_boxes
[...,
3
]
=
torch
.
exp
(
h
.
data
)
*
anchor_h
# 用于将输出调整为相对于416x416的大小
#----------------------------------------------------------#
# 将输出结果调整成相对于输入图像大小
#----------------------------------------------------------#
_scale
=
torch
.
Tensor
([
stride_w
,
stride_h
]
*
2
).
type
(
FloatTensor
)
output
=
torch
.
cat
((
pred_boxes
.
view
(
batch_size
,
-
1
,
4
)
*
_scale
,
conf
.
view
(
batch_size
,
-
1
,
1
),
pred_cls
.
view
(
batch_size
,
-
1
,
self
.
num_classes
)),
-
1
)
...
...
@@ -139,7 +174,10 @@ def bbox_iou(box1, box2, x1y1x2y2=True):
def
non_max_suppression
(
prediction
,
num_classes
,
conf_thres
=
0.5
,
nms_thres
=
0.4
):
# 求左上角和右下角
#----------------------------------------------------------#
# 将预测结果的格式转换成左上角右下角的格式。
# prediction [batch_size, num_anchors, 85]
#----------------------------------------------------------#
box_corner
=
prediction
.
new
(
prediction
.
shape
)
box_corner
[:,
:,
0
]
=
prediction
[:,
:,
0
]
-
prediction
[:,
:,
2
]
/
2
box_corner
[:,
:,
1
]
=
prediction
[:,
:,
1
]
-
prediction
[:,
:,
3
]
/
2
...
...
@@ -149,21 +187,35 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
output
=
[
None
for
_
in
range
(
len
(
prediction
))]
for
image_i
,
image_pred
in
enumerate
(
prediction
):
# 获得种类及其置信度
#----------------------------------------------------------#
# 对种类预测部分取max。
# class_conf [batch_size, num_anchors, 1] 种类置信度
# class_pred [batch_size, num_anchors, 1] 种类
#----------------------------------------------------------#
class_conf
,
class_pred
=
torch
.
max
(
image_pred
[:,
5
:
5
+
num_classes
],
1
,
keepdim
=
True
)
# 利用置信度进行第一轮筛选
conf_mask
=
(
image_pred
[:,
4
]
*
class_conf
[:,
0
]
>=
conf_thres
).
squeeze
()
#----------------------------------------------------------#
# 利用置信度进行第一轮筛选
#----------------------------------------------------------#
conf_mask
=
(
image_pred
[:,
4
]
*
class_conf
[:,
0
]
>=
conf_thres
).
squeeze
()
#----------------------------------------------------------#
# 根据置信度进行预测结果的筛选
#----------------------------------------------------------#
image_pred
=
image_pred
[
conf_mask
]
class_conf
=
class_conf
[
conf_mask
]
class_pred
=
class_pred
[
conf_mask
]
if
not
image_pred
.
size
(
0
):
continue
# 获得的内容为(x1, y1, x2, y2, obj_conf, class_conf, class_pred)
#-------------------------------------------------------------------------#
# detections [batch_size, num_anchors, 7]
# 7的内容为:x1, y1, x2, y2, obj_conf, class_conf, class_pred
#-------------------------------------------------------------------------#
detections
=
torch
.
cat
((
image_pred
[:,
:
5
],
class_conf
.
float
(),
class_pred
.
float
()),
1
)
# 获得种类
#------------------------------------------#
# 获得预测结果中包含的所有种类
#------------------------------------------#
unique_labels
=
detections
[:,
-
1
].
cpu
().
unique
()
if
prediction
.
is_cuda
:
...
...
@@ -171,7 +223,9 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
detections
=
detections
.
cuda
()
for
c
in
unique_labels
:
# 获得某一类初步筛选后全部的预测结果
#------------------------------------------#
# 获得某一类得分筛选后全部的预测结果
#------------------------------------------#
detections_class
=
detections
[
detections
[:,
-
1
]
==
c
]
#------------------------------------------#
...
...
@@ -179,7 +233,7 @@ def non_max_suppression(prediction, num_classes, conf_thres=0.5, nms_thres=0.4):
#------------------------------------------#
keep
=
nms
(
detections_class
[:,
:
4
],
detections_class
[:,
4
]
*
detections_class
[:,
5
],
detections_class
[:,
4
]
*
detections_class
[:,
5
],
nms_thres
)
max_detections
=
detections_class
[
keep
]
...
...
video.py
浏览文件 @
30094fa5
#-------------------------------------#
# 调用摄像头检测
# 调用摄像头或者视频进行检测
# 调用摄像头直接运行即可
# 调用视频可以将cv2.VideoCapture()指定路径
# 视频的保存并不难,可以百度一下看看
#-------------------------------------#
from
yolo
import
YOLO
from
PIL
import
Image
import
numpy
as
np
import
cv2
import
time
yolo
=
YOLO
()
# 调用摄像头
capture
=
cv2
.
VideoCapture
(
0
)
# capture=cv2.VideoCapture("1.mp4")
import
cv2
import
numpy
as
np
from
PIL
import
Image
from
yolo
import
YOLO
yolo
=
YOLO
()
#-------------------------------------#
# 调用摄像头
# capture=cv2.VideoCapture("1.mp4")
#-------------------------------------#
capture
=
cv2
.
VideoCapture
(
0
)
fps
=
0.0
while
(
True
):
t1
=
time
.
time
()
...
...
@@ -19,10 +27,8 @@ while(True):
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_BGR2RGB
)
# 转变成Image
frame
=
Image
.
fromarray
(
np
.
uint8
(
frame
))
# 进行检测
frame
=
np
.
array
(
yolo
.
detect_image
(
frame
))
# RGBtoBGR满足opencv显示格式
frame
=
cv2
.
cvtColor
(
frame
,
cv2
.
COLOR_RGB2BGR
)
...
...
@@ -32,7 +38,6 @@ while(True):
cv2
.
imshow
(
"video"
,
frame
)
c
=
cv2
.
waitKey
(
1
)
&
0xff
if
c
==
27
:
capture
.
release
()
...
...
voc_annotation.py
浏览文件 @
30094fa5
#---------------------------------------------#
# 运行前一定要修改classes
# 如果生成的2007_train.txt里面没有目标信息
# 那么就是因为classes没有设定正确
#---------------------------------------------#
import
xml.etree.ElementTree
as
ET
from
os
import
getcwd
...
...
yolo.py
浏览文件 @
30094fa5
#-------------------------------------#
# 创建YOLO类
#-------------------------------------#
import
cv2
import
numpy
as
np
import
colorsys
import
os
import
cv2
import
numpy
as
np
import
torch
import
torch.nn
as
nn
from
nets.yolo3
import
YoloBody
import
torch.backends.cudnn
as
cudnn
from
PIL
import
Image
,
ImageFont
,
ImageDraw
import
torch.nn
as
nn
from
PIL
import
Image
,
ImageDraw
,
ImageFont
from
torch.autograd
import
Variable
from
nets.yolo3
import
YoloBody
from
utils.config
import
Config
from
utils.utils
import
non_max_suppression
,
bbox_iou
,
DecodeBox
,
letterbox_image
,
yolo_correct_boxes
from
utils.utils
import
(
DecodeBox
,
bbox_iou
,
letterbox_image
,
non_max_suppression
,
yolo_correct_boxes
)
#--------------------------------------------#
# 使用自己训练好的模型预测需要修改2个参数
# model_path和classes_path都需要修改!
# 如果出现shape不匹配,一定要注意
# 训练时的model_path和classes_path参数的修改
#--------------------------------------------#
class
YOLO
(
object
):
_defaults
=
{
...
...
@@ -52,14 +58,20 @@ class YOLO(object):
class_names
=
f
.
readlines
()
class_names
=
[
c
.
strip
()
for
c
in
class_names
]
return
class_names
#---------------------------------------------------#
#
获得所有的分类
#
生成模型
#---------------------------------------------------#
def
generate
(
self
):
self
.
config
[
"yolo"
][
"classes"
]
=
len
(
self
.
class_names
)
#---------------------------------------------------#
# 建立yolov3模型
#---------------------------------------------------#
self
.
net
=
YoloBody
(
self
.
config
)
# 加快模型训练的效率
#---------------------------------------------------#
# 载入yolov3模型的权重
#---------------------------------------------------#
print
(
'Loading weights into state dict...'
)
device
=
torch
.
device
(
'cuda'
if
torch
.
cuda
.
is_available
()
else
'cpu'
)
state_dict
=
torch
.
load
(
self
.
model_path
,
map_location
=
device
)
...
...
@@ -71,10 +83,12 @@ class YOLO(object):
self
.
net
=
nn
.
DataParallel
(
self
.
net
)
self
.
net
=
self
.
net
.
cuda
()
#---------------------------------------------------#
# 建立三个特征层解码用的工具
#---------------------------------------------------#
self
.
yolo_decodes
=
[]
for
i
in
range
(
3
):
self
.
yolo_decodes
.
append
(
DecodeBox
(
self
.
config
[
"yolo"
][
"anchors"
][
i
],
self
.
config
[
"yolo"
][
"classes"
],
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
self
.
yolo_decodes
.
append
(
DecodeBox
(
self
.
config
[
"yolo"
][
"anchors"
][
i
],
self
.
config
[
"yolo"
][
"classes"
],
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
print
(
'{} model, anchors, and classes loaded.'
.
format
(
self
.
model_path
))
# 画框设置不同的颜色
...
...
@@ -91,44 +105,65 @@ class YOLO(object):
def
detect_image
(
self
,
image
):
image_shape
=
np
.
array
(
np
.
shape
(
image
)[
0
:
2
])
#---------------------------------------------------------#
# 给图像增加灰条,实现不失真的resize
#---------------------------------------------------------#
crop_img
=
np
.
array
(
letterbox_image
(
image
,
(
self
.
model_image_size
[
1
],
self
.
model_image_size
[
0
])))
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
photo
/=
255.0
photo
=
np
.
array
(
crop_img
,
dtype
=
np
.
float32
)
/
255.0
photo
=
np
.
transpose
(
photo
,
(
2
,
0
,
1
))
photo
=
photo
.
astype
(
np
.
float32
)
images
=
[]
images
.
append
(
photo
)
#---------------------------------------------------------#
# 添加上batch_size维度
#---------------------------------------------------------#
images
=
[
photo
]
images
=
np
.
asarray
(
images
)
images
=
torch
.
from_numpy
(
images
)
if
self
.
cuda
:
images
=
images
.
cuda
()
with
torch
.
no_grad
():
images
=
torch
.
from_numpy
(
np
.
asarray
(
images
))
if
self
.
cuda
:
images
=
images
.
cuda
()
#---------------------------------------------------------#
# 将图像输入网络当中进行预测!
#---------------------------------------------------------#
outputs
=
self
.
net
(
images
)
output_list
=
[]
for
i
in
range
(
3
):
output_list
.
append
(
self
.
yolo_decodes
[
i
](
outputs
[
i
]))
#---------------------------------------------------------#
# 将预测框进行堆叠,然后进行非极大抑制
#---------------------------------------------------------#
output
=
torch
.
cat
(
output_list
,
1
)
batch_detections
=
non_max_suppression
(
output
,
self
.
config
[
"yolo"
][
"classes"
],
conf_thres
=
self
.
confidence
,
nms_thres
=
self
.
iou
)
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
# 去掉灰条
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
#---------------------------------------------------------#
# 如果没有检测出物体,返回原图
#---------------------------------------------------------#
try
:
batch_detections
=
batch_detections
[
0
].
cpu
().
numpy
()
except
:
return
image
#---------------------------------------------------------#
# 对预测框进行得分筛选
#---------------------------------------------------------#
top_index
=
batch_detections
[:,
4
]
*
batch_detections
[:,
5
]
>
self
.
confidence
top_conf
=
batch_detections
[
top_index
,
4
]
*
batch_detections
[
top_index
,
5
]
top_label
=
np
.
array
(
batch_detections
[
top_index
,
-
1
],
np
.
int32
)
top_bboxes
=
np
.
array
(
batch_detections
[
top_index
,:
4
])
top_xmin
,
top_ymin
,
top_xmax
,
top_ymax
=
np
.
expand_dims
(
top_bboxes
[:,
0
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
1
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
2
],
-
1
),
np
.
expand_dims
(
top_bboxes
[:,
3
],
-
1
)
#-----------------------------------------------------------------#
# 在图像传入网络预测前会进行letterbox_image给图像周围添加灰条
# 因此生成的top_bboxes是相对于有灰条的图像的
# 我们需要对其进行修改,去除灰条的部分。
#-----------------------------------------------------------------#
boxes
=
yolo_correct_boxes
(
top_ymin
,
top_xmin
,
top_ymax
,
top_xmax
,
np
.
array
([
self
.
model_image_size
[
0
],
self
.
model_image_size
[
1
]]),
image_shape
)
font
=
ImageFont
.
truetype
(
font
=
'model_data/simhei.ttf'
,
size
=
np
.
floor
(
3e-2
*
np
.
shape
(
image
)[
1
]
+
0.5
).
astype
(
'int32'
))
thickness
=
(
np
.
shape
(
image
)[
0
]
+
np
.
shape
(
image
)[
1
])
//
self
.
model_image_size
[
0
]
thickness
=
max
((
np
.
shape
(
image
)[
0
]
+
np
.
shape
(
image
)[
1
])
//
self
.
model_image_size
[
0
],
1
)
for
i
,
c
in
enumerate
(
top_label
):
predicted_class
=
self
.
class_names
[
c
]
...
...
@@ -150,7 +185,7 @@ class YOLO(object):
draw
=
ImageDraw
.
Draw
(
image
)
label_size
=
draw
.
textsize
(
label
,
font
)
label
=
label
.
encode
(
'utf-8'
)
print
(
label
)
print
(
label
,
top
,
left
,
bottom
,
right
)
if
top
-
label_size
[
1
]
>=
0
:
text_origin
=
np
.
array
([
left
,
top
-
label_size
[
1
]])
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录