Add files via upload

e708e029 · Bubbliiiing · GitHub · 4e783255 · e708e029 · e708e029
隐藏空白更改
内联并排

Showing with 232 addition and 38 deletion

kmeans_for_anchors.py kmeans_for_anchors.py +5 -3

predict.py predict.py +93 -21

train.py train.py +44 -12

yolo.py yolo.py +90 -2

未找到文件。
--- a/kmeans_for_anchors.py
+++ b/kmeans_for_anchors.py
+#-------------------------------------------------------------------------------------------------#
+#   kmeans虽然会对数据集中的框进行聚类，但是很多数据集由于框的大小相近，聚类出来的9个框相差不大，
+#   这样的框反而不利于模型的训练。因为不同的特征层适合不同大小的先验框，越浅的特征层适合越大的先验框
+#   原始网络的先验框已经按大中小比例分配好了，不进行聚类也会有非常好的效果。
+#-------------------------------------------------------------------------------------------------#
 import glob
-import random
 import xml.etree.ElementTree as ET

 import numpy as np

-
 def cas_iou(box,cluster):
    x = np.minimum(cluster[:,0],box[0])
    y = np.minimum(cluster[:,1],box[1])
@@ -20,7 +23,6 @@ def cas_iou(box,cluster):
 def avg_iou(box,cluster):
    return np.mean([np.max(cas_iou(box[i],cluster)) for i in range(box.shape[0])])

-
 def kmeans(box,k):
    # 取出一共有多少框
    row = box.shape[0]

--- a/predict.py
+++ b/predict.py
-'''
-predict.py有几个注意点
-1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
-具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
-2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
-3、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
-4、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
-在原图上利用矩阵的方式进行截取。
-5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
-比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
-'''
+#----------------------------------------------------#
+#   对视频中的predict.py进行了修改，
+#   将单张图片预测、摄像头检测和FPS测试功能
+#   整合到了一个py文件中，通过指定mode进行模式的修改。
+#----------------------------------------------------#
+import time
+
+import cv2
+import numpy as np
 from PIL import Image

 from yolo import YOLO

-yolo = YOLO()
+if __name__ == "__main__":
+    yolo = YOLO()
+    #-------------------------------------------------------------------------#
+    #   mode用于指定测试的模式：
+    #   'predict'表示单张图片预测
+    #   'video'表示视频检测
+    #   'fps'表示测试fps
+    #-------------------------------------------------------------------------#
+    mode = "predict"
+    #-------------------------------------------------------------------------#
+    #   video_path用于指定视频的路径，当video_path=0时表示检测摄像头
+    #   video_save_path表示视频保存的路径，当video_save_path=""时表示不保存
+    #   video_fps用于保存的视频的fps
+    #   video_path、video_save_path和video_fps仅在mode='video'时有效
+    #   保存视频时需要ctrl+c退出才会完成完整的保存步骤，不可直接结束程序。
+    #-------------------------------------------------------------------------#
+    video_path      = 0
+    video_save_path = ""
+    video_fps       = 25.0
+
+    if mode == "predict":
+        '''
+        1、该代码无法直接进行批量预测，如果想要批量预测，可以利用os.listdir()遍历文件夹，利用Image.open打开图片文件进行预测。
+        具体流程可以参考get_dr_txt.py，在get_dr_txt.py即实现了遍历还实现了目标信息的保存。
+        2、如果想要进行检测完的图片的保存，利用r_image.save("img.jpg")即可保存，直接在predict.py里进行修改即可。 
+        3、如果想要获得预测框的坐标，可以进入yolo.detect_image函数，在绘图部分读取top，left，bottom，right这四个值。
+        4、如果想要利用预测框截取下目标，可以进入yolo.detect_image函数，在绘图部分利用获取到的top，left，bottom，right这四个值
+        在原图上利用矩阵的方式进行截取。
+        5、如果想要在预测图上写额外的字，比如检测到的特定目标的数量，可以进入yolo.detect_image函数，在绘图部分对predicted_class进行判断，
+        比如判断if predicted_class == 'car': 即可判断当前目标是否为车，然后记录数量即可。利用draw.text即可写字。
+        '''
+        while True:
+            img = input('Input image filename:')
+            try:
+                image = Image.open(img)
+            except:
+                print('Open Error! Try again!')
+                continue
+            else:
+                r_image = yolo.detect_image(image)
+                r_image.show()
+
+    elif mode == "video":
+        capture=cv2.VideoCapture(video_path)
+        if video_save_path!="":
+            fourcc = cv2.VideoWriter_fourcc(*'XVID')
+            size = (int(capture.get(cv2.CAP_PROP_FRAME_WIDTH)), int(capture.get(cv2.CAP_PROP_FRAME_HEIGHT)))
+            out = cv2.VideoWriter(video_save_path, fourcc, video_fps, size)
+
+        fps = 0.0
+        while(True):
+            t1 = time.time()
+            # 读取某一帧
+            ref,frame=capture.read()
+            # 格式转变，BGRtoRGB
+            frame = cv2.cvtColor(frame,cv2.COLOR_BGR2RGB)
+            # 转变成Image
+            frame = Image.fromarray(np.uint8(frame))
+            # 进行检测
+            frame = np.array(yolo.detect_image(frame))
+            # RGBtoBGR满足opencv显示格式
+            frame = cv2.cvtColor(frame,cv2.COLOR_RGB2BGR)
+            
+            fps  = ( fps + (1./(time.time()-t1)) ) / 2
+            print("fps= %.2f"%(fps))
+            frame = cv2.putText(frame, "fps= %.2f"%(fps), (0, 40), cv2.FONT_HERSHEY_SIMPLEX, 1, (0, 255, 0), 2)
+            
+            cv2.imshow("video",frame)
+            c= cv2.waitKey(1) & 0xff 
+            if video_save_path!="":
+                out.write(frame)
+
+            if c==27:
+                capture.release()
+                break
+        capture.release()
+        out.release()
+        cv2.destroyAllWindows()

-while True:
-    img = input('Input image filename:')
-    try:
-        image = Image.open(img)
-    except:
-        print('Open Error! Try again!')
-        continue
+    elif mode == "fps":
+        test_interval = 100
+        img = Image.open('img/street.jpg')
+        tact_time = yolo.get_FPS(img, test_interval)
+        print(str(tact_time) + ' seconds, ' + str(1/tact_time) + 'FPS, @batch_size 1')
    else:
-        r_image = yolo.detect_image(image)
-        r_image.show()
+        raise AssertionError("Please specify the correct mode: 'predict', 'video' or 'fps'.")
--- a/train.py
+++ b/train.py
@@ -9,7 +9,7 @@ from torch.utils.data import DataLoader
 from tqdm import tqdm

 from nets.yolo4 import YoloBody
-from nets.yolo_training import YOLOLoss, LossHistory, weights_init
+from nets.yolo_training import LossHistory, YOLOLoss, weights_init
 from utils.dataloader import YoloDataset, yolo_dataset_collate


@@ -36,6 +36,8 @@ def get_lr(optimizer):

        
 def fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,genval,Epoch,cuda):
+    if Tensorboard:
+        global train_tensorboard_step, val_tensorboard_step
    total_loss = 0
    val_loss = 0

@@ -72,18 +74,26 @@ def fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,genval,Epoch
                num_pos_all += num_pos

            loss = sum(losses) / num_pos_all
+            total_loss += loss.item()
+
            #----------------------#
            #   反向传播
            #----------------------#
            loss.backward()
            optimizer.step()

-            total_loss += loss.item()
-            
+            if Tensorboard:
+                # 将loss写入tensorboard，每一步都写
+                writer.add_scalar('Train_loss', loss, train_tensorboard_step)
+                train_tensorboard_step += 1
+
            pbar.set_postfix(**{'total_loss': total_loss / (iteration + 1), 
                                'lr'        : get_lr(optimizer)})
            pbar.update(1)

+    # 将loss写入tensorboard，下面注释的是每个世代保存一次
+    # if Tensorboard:
+    #     writer.add_scalar('Train_loss', total_loss/(iteration+1), epoch)
    net.eval()
    print('Start Validation')
    with tqdm(total=epoch_size_val, desc=f'Epoch {epoch + 1}/{Epoch}',postfix=dict,mininterval=0.3) as pbar:
@@ -110,9 +120,17 @@ def fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,genval,Epoch
                    num_pos_all += num_pos
                loss = sum(losses) / num_pos_all
                val_loss += loss.item()
+
+            # 将loss写入tensorboard, 下面注释的是每一步都写
+            # if Tensorboard:
+            #     writer.add_scalar('Val_loss', loss, val_tensorboard_step)
+            #     val_tensorboard_step += 1
            pbar.set_postfix(**{'total_loss': val_loss / (iteration + 1)})
            pbar.update(1)
            
+    # 将loss写入tensorboard，每个世代保存一次
+    if Tensorboard:
+        writer.add_scalar('Val_loss',val_loss / (epoch_size_val+1), epoch)
    loss_history.append_loss(total_loss/(epoch_size+1), val_loss/(epoch_size_val+1))
    print('Finish Validation')
    print('Epoch:'+ str(epoch+1) + '/' + str(Epoch))
@@ -125,6 +143,10 @@ def fit_one_epoch(net,yolo_loss,epoch,epoch_size,epoch_size_val,gen,genval,Epoch
 #   https://www.bilibili.com/video/BV1zE411u7Vw
 #----------------------------------------------------#
 if __name__ == "__main__":
+    #-------------------------------#
+    #   是否使用Tensorboard
+    #-------------------------------#
+    Tensorboard = False
    #-------------------------------#
    #   是否使用Cuda
    #   没有GPU可以设置成False
@@ -141,20 +163,12 @@ if __name__ == "__main__":
    #   显存比较大可以使用608x608
    #-------------------------------#
    input_shape = (416,416)
-
    #----------------------------------------------------#
    #   classes和anchor的路径，非常重要
    #   训练前一定要修改classes_path，使其对应自己的数据集
    #----------------------------------------------------#
    anchors_path = 'model_data/yolo_anchors.txt'
    classes_path = 'model_data/voc_classes.txt'   
-    #----------------------------------------------------#
-    #   获取classes和anchor
-    #----------------------------------------------------#
-    class_names = get_classes(classes_path)
-    anchors = get_anchors(anchors_path)
-    num_classes = len(class_names)
-    
    #------------------------------------------------------#
    #   Yolov4的tricks应用
    #   mosaic 马赛克数据增强 True or False 
@@ -166,6 +180,13 @@ if __name__ == "__main__":
    Cosine_lr = False
    smoooth_label = 0

+    #----------------------------------------------------#
+    #   获取classes和anchor
+    #----------------------------------------------------#
+    class_names = get_classes(classes_path)
+    anchors = get_anchors(anchors_path)
+    num_classes = len(class_names)
+
    #------------------------------------------------------#
    #   创建yolo模型
    #   训练前一定要修改classes_path和对应的txt文件
@@ -213,7 +234,18 @@ if __name__ == "__main__":
    np.random.seed(None)
    num_val = int(len(lines)*val_split)
    num_train = len(lines) - num_val
-    
+
+    if Tensorboard:
+        from tensorboardX import SummaryWriter
+        writer = SummaryWriter(log_dir='logs',flush_secs=60)
+        if Cuda:
+            graph_inputs = torch.randn(1,3,input_shape[0],input_shape[1]).type(torch.FloatTensor).cuda()
+        else:
+            graph_inputs = torch.randn(1,3,input_shape[0],input_shape[1]).type(torch.FloatTensor)
+        writer.add_graph(model, graph_inputs)
+        train_tensorboard_step  = 1
+        val_tensorboard_step    = 1
+
    #------------------------------------------------------#
    #   主干特征提取网络特征通用，冻结训练可以加快训练速度
    #   也可以在训练初期防止权值被破坏。

--- a/yolo.py
+++ b/yolo.py
@@ -3,6 +3,7 @@
 #-------------------------------------#
 import colorsys
 import os
+import time

 import numpy as np
 import torch
@@ -10,8 +11,8 @@ import torch.nn as nn
 from PIL import Image, ImageDraw, ImageFont

 from nets.yolo4 import YoloBody
-from utils.utils import (DecodeBox, letterbox_image,
-                         non_max_suppression, yolo_correct_boxes)
+from utils.utils import (DecodeBox, letterbox_image, non_max_suppression,
+                         yolo_correct_boxes)


 #--------------------------------------------#
@@ -229,3 +230,90 @@ class YOLO(object):
            del draw
        return image

+    def get_FPS(self, image, test_interval):
+        # 调整图片使其符合输入要求
+        image_shape = np.array(np.shape(image)[0:2])
+
+        #---------------------------------------------------------#
+        #   给图像增加灰条，实现不失真的resize
+        #   也可以直接resize进行识别
+        #---------------------------------------------------------#
+        if self.letterbox_image:
+            crop_img = np.array(letterbox_image(image, (self.model_image_size[1],self.model_image_size[0])))
+        else:
+            crop_img = image.convert('RGB')
+            crop_img = crop_img.resize((self.model_image_size[1],self.model_image_size[0]), Image.BICUBIC)
+        photo = np.array(crop_img,dtype = np.float32) / 255.0
+        photo = np.transpose(photo, (2, 0, 1))
+        #---------------------------------------------------------#
+        #   添加上batch_size维度
+        #---------------------------------------------------------#
+        images = [photo]
+
+        with torch.no_grad():
+            images = torch.from_numpy(np.asarray(images))
+            if self.cuda:
+                images = images.cuda()
+            outputs = self.net(images)
+            output_list = []
+            for i in range(3):
+                output_list.append(self.yolo_decodes[i](outputs[i]))
+            output = torch.cat(output_list, 1)
+            batch_detections = non_max_suppression(output, len(self.class_names),
+                                                    conf_thres=self.confidence,
+                                                    nms_thres=self.iou)
+            try:
+                batch_detections = batch_detections[0].cpu().numpy()
+                top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
+                top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+                top_label = np.array(batch_detections[top_index,-1],np.int32)
+                top_bboxes = np.array(batch_detections[top_index,:4])
+                top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+                
+                if self.letterbox_image:
+                    boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                else:
+                    top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
+                    top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
+                    top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
+                    top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
+                    boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
+            
+            except:
+                pass
+                
+        t1 = time.time()
+        for _ in range(test_interval):
+            with torch.no_grad():
+                outputs = self.net(images)
+                output_list = []
+                for i in range(3):
+                    output_list.append(self.yolo_decodes[i](outputs[i]))
+                output = torch.cat(output_list, 1)
+                batch_detections = non_max_suppression(output, len(self.class_names),
+                                                        conf_thres=self.confidence,
+                                                        nms_thres=self.iou)
+                try:
+                    batch_detections = batch_detections[0].cpu().numpy()
+                    top_index = batch_detections[:,4]*batch_detections[:,5] > self.confidence
+                    top_conf = batch_detections[top_index,4]*batch_detections[top_index,5]
+                    top_label = np.array(batch_detections[top_index,-1],np.int32)
+                    top_bboxes = np.array(batch_detections[top_index,:4])
+                    top_xmin, top_ymin, top_xmax, top_ymax = np.expand_dims(top_bboxes[:,0],-1),np.expand_dims(top_bboxes[:,1],-1),np.expand_dims(top_bboxes[:,2],-1),np.expand_dims(top_bboxes[:,3],-1)
+                   
+                    if self.letterbox_image:
+                        boxes = yolo_correct_boxes(top_ymin,top_xmin,top_ymax,top_xmax,np.array([self.model_image_size[0],self.model_image_size[1]]),image_shape)
+                    else:
+                        top_xmin = top_xmin / self.model_image_size[1] * image_shape[1]
+                        top_ymin = top_ymin / self.model_image_size[0] * image_shape[0]
+                        top_xmax = top_xmax / self.model_image_size[1] * image_shape[1]
+                        top_ymax = top_ymax / self.model_image_size[0] * image_shape[0]
+                        boxes = np.concatenate([top_ymin,top_xmin,top_ymax,top_xmax], axis=-1)
+                
+                except:
+                    pass
+
+        t2 = time.time()
+        tact_time = (t2 - t1) / test_interval
+        return tact_time
+