diff --git a/datasets/datasets.txt b/datasets/datasets.txt new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/inference.py b/inference.py new file mode 100644 index 0000000000000000000000000000000000000000..a86ed150ffd1256f1775e74f543111ce3cf8b747 --- /dev/null +++ b/inference.py @@ -0,0 +1,192 @@ +#-*-coding:utf-8-*- +# date:2020-04-12 +# Author: Eric.Lee +# function: inference + +import os +import argparse +import torch +import torch.nn as nn +from data_iter.datasets import letterbox +import numpy as np + +import time +import datetime +import os +import math +from datetime import datetime +import cv2 +import torch.nn.functional as F +import xml.etree.cElementTree as ET +from models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152 + +def get_xml_msg(path): + list_x = [] + tree=ET.parse(path)# 解析 xml 文件 + root=tree.getroot() + for Object in root.findall('object'): + name=Object.find('name').text + #---------------------------- + bndbox=Object.find('bndbox') + xmin= np.float32((bndbox.find('xmin').text)) + ymin= np.float32((bndbox.find('ymin').text)) + xmax= np.float32((bndbox.find('xmax').text)) + ymax= np.float32((bndbox.find('ymax').text)) + bbox = int(xmin),int(ymin),int(xmax),int(ymax) + xyxy = xmin,ymin,xmax,ymax + list_x.append((name,xyxy)) + return list_x + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description=' Project Classification Test') + parser.add_argument('--test_model', type=str, default = './model_exp/2021-02-09_06-32-32/model_epoch-627.pth', + help = 'test_model') # 模型路径 + parser.add_argument('--model', type=str, default = 'resnet_50', + help = 'model : resnet_18,resnet_34,resnet_50,resnet_101,resnet_152') # 模型类型 + parser.add_argument('--num_classes', type=int , default = 120, + help = 'num_classes') # 分类类别个数 + parser.add_argument('--GPUS', type=str, default = '0', + help = 'GPUS') # GPU选择 + parser.add_argument('--test_path', type=str, default = './datasets/test/', + help = 'test_path') # 测试集路径 + parser.add_argument('--img_size', type=tuple , default = (256,256), + help = 'img_size') # 输入模型图片尺寸 + parser.add_argument('--fix_res', type=bool , default = False, + help = 'fix_resolution') # 输入模型样本图片是否保证图像分辨率的长宽比 + parser.add_argument('--vis', type=bool , default = True, + help = 'vis') # 是否可视化图片 + + print('\n/******************* {} ******************/\n'.format(parser.description)) + #-------------------------------------------------------------------------- + ops = parser.parse_args()# 解析添加参数 + #-------------------------------------------------------------------------- + print('----------------------------------') + + unparsed = vars(ops) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 + for key in unparsed.keys(): + print('{} : {}'.format(key,unparsed[key])) + + #--------------------------------------------------------------------------- + os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS + + test_path = ops.test_path # 测试图片文件夹路径 + + #---------------------------------------------------------------- 构建模型 + print('use model : %s'%(ops.model)) + + if ops.model == 'resnet_18': + model_=resnet18(num_classes=ops.num_classes, img_size=ops.img_size[0]) + elif ops.model == 'resnet_34': + model_=resnet34(num_classes=ops.num_classes, img_size=ops.img_size[0]) + elif ops.model == 'resnet_50': + model_=resnet50(num_classes=ops.num_classes, img_size=ops.img_size[0]) + elif ops.model == 'resnet_101': + model_=resnet101(num_classes=ops.num_classes, img_size=ops.img_size[0]) + elif ops.model == 'resnet_152': + model_=resnet152(num_classes=ops.num_classes, img_size=ops.img_size[0]) + else: + print('error no the struct model : {}'.format(ops.model)) + + use_cuda = torch.cuda.is_available() + + device = torch.device("cuda:0" if use_cuda else "cpu") + model_ = model_.to(device) + model_.eval() # 设置为前向推断模式 + + # print(model_)# 打印模型结构 + + # 加载测试模型 + if os.access(ops.test_model,os.F_OK):# checkpoint + chkpt = torch.load(ops.test_model, map_location=device) + model_.load_state_dict(chkpt) + print('load test model : {}'.format(ops.test_model)) + #---------------------------------------------------------------- + dict_r = {} + dict_p = {} + dict_static = {} + for idx,doc in enumerate(sorted(os.listdir(ops.test_path), key=lambda x:int(x.split('-')[0]), reverse=False)): + if doc not in dict_static.keys(): + dict_static[idx] = doc + dict_r[doc] = 0 + dict_p[doc] = 0 + #---------------------------------------------------------------- 预测图片 + + font = cv2.FONT_HERSHEY_SIMPLEX + with torch.no_grad(): + for idx,doc in enumerate(sorted(os.listdir(ops.test_path), key=lambda x:int(x.split('-')[0]), reverse=False)): + + gt_label = idx + for file in os.listdir(ops.test_path+doc): + if ".jpg" not in file: + continue + print('------>>> {} - gt_label : {}'.format(file,gt_label)) + + img = cv2.imread(ops.test_path +doc+'/'+ file) + #--------------- + + xml_ = ops.test_path +doc+'/'+ file.replace(".jpg",".xml") + + list_x = get_xml_msg(xml_)# 获取 xml 文件 的 object + + # 绘制 bbox + for j in range(min(1,len(list_x))): + label_,bbox_ = list_x[j] + x1,y1,x2,y2 = bbox_ + x1 = int(np.clip(x1,0,img.shape[1]-1)) + y1 = int(np.clip(y1,0,img.shape[0]-1)) + x2 = int(np.clip(x2,0,img.shape[1]-1)) + y2 = int(np.clip(y2,0,img.shape[0]-1)) + img = img[y1:y2,x1:x2,:] + + # 输入图片预处理 + if ops.fix_res: + img_ = letterbox(img,size_=ops.img_size[0],mean_rgb = (128,128,128)) + else: + img_ = cv2.resize(img, (ops.img_size[1],ops.img_size[0]), interpolation = cv2.INTER_CUBIC) + if ops.vis: + cv2.namedWindow('image',0) + cv2.imshow('image',img_) + cv2.waitKey(1) + img_ = img_.astype(np.float32) + img_ = (img_-128.)/256. + + img_ = img_.transpose(2, 0, 1) + img_ = torch.from_numpy(img_) + img_ = img_.unsqueeze_(0) + + if use_cuda: + img_ = img_.cuda() # (bs, 3, h, w) + + pre_ = model_(img_.float()) + + outputs = F.softmax(pre_,dim = 1) + outputs = outputs[0] + + output = outputs.cpu().detach().numpy() + output = np.array(output) + + max_index = np.argmax(output) + + score_ = output[max_index] + + print('gt {} -- pre {} --->>> confidence {}'.format(gt_label,max_index,score_)) + dict_p[dict_static[max_index]] += 1 + if gt_label == max_index: + dict_r[doc] += 1 + + cv2.destroyAllWindows() + # Top1 的 每类预测精确度。 + print('\n-----------------------------------------------\n') + acc_list = [] + for idx,doc in enumerate(sorted(os.listdir(ops.test_path), key=lambda x:int(x.split('-')[0]), reverse=False)): + fm = dict_p[doc] + fz = dict_r[doc] + acc_list.append(fz/fm) + try: + print('{}: {}'.format(doc,fz/fm)) + except: + print('error') + print("\n MAP : {}".format(np.mean(acc_list))) + print('\nwell done ') diff --git a/loss/loss.py b/loss/loss.py new file mode 100644 index 0000000000000000000000000000000000000000..e65aecf84325482fcdbc6deb816a8dbd4023080e --- /dev/null +++ b/loss/loss.py @@ -0,0 +1,141 @@ +# -*- coding: utf-8 -*- +""" +qi=1-smoothing(if i=y) +qi=smoothing / (self.size - 1) (otherwise)#所以默认可以fill这个数,只在i=y的地方执行1-smoothing +另外KLDivLoss和crossentroy的不同是前者有一个常数 +predict = torch.FloatTensor([[0, 0.2, 0.7, 0.1, 0], +     +                                 [0, 0.9, 0.2, 0.1, 0],  +     +                                 [1, 0.2, 0.7, 0.1, 0]]) +对应的label为 +tensor([[ 0.0250,  0.0250,  0.9000,  0.0250,  0.0250], +        [ 0.9000,  0.0250,  0.0250,  0.0250,  0.0250], +        [ 0.0250,  0.0250,  0.0250,  0.9000,  0.0250]]) +区别于one-hot的 +tensor([[ 0.,  0.,  1.,  0.,  0.], +        [ 1.,  0.,  0.,  0.,  0.], +        [ 0.,  1.,  0.,  0.,  0.]]) +""" +import torch +import torch.nn as nn +from torch.autograd import Variable +import matplotlib.pyplot as plt +import numpy as np +import torch.nn.functional as F +class LabelSmoothing(nn.Module): + "Implement label smoothing.  size表示类别总数  " + def __init__(self, size, smoothing=0.0): + super(LabelSmoothing, self).__init__() + self.criterion = nn.KLDivLoss(size_average=False) + self.confidence = 1.0 - smoothing#if i=y的公式 + self.smoothing = smoothing + self.size = size + self.true_dist = None + def forward(self, x, target): + """ + x表示输入 (N,M)N个样本,M表示总类数,每一个类的概率log P +        target表示label(M,) +        """ + assert x.size(1) == self.size + true_dist = x.data.clone()#先深复制过来 + #print true_dist + true_dist.fill_(self.smoothing / (self.size - 1))#otherwise的公式 + #变成one-hot编码,1表示按列填充, + #target.data.unsqueeze(1)表示索引,confidence表示填充的数字 + true_dist.scatter_(1, target.data.unsqueeze(1), self.confidence) + self.true_dist = true_dist + return self.criterion(x, Variable(true_dist, requires_grad=False)) + +# 支持多分类和二分类 +class FocalLoss(nn.Module): + """ + This is a implementation of Focal Loss with smooth label cross entropy supported which is proposed in + 'Focal Loss for Dense Object Detection. (https://arxiv.org/abs/1708.02002)' + Focal_Loss= -1*alpha*(1-pt)^gamma*log(pt) + :param num_class: + :param alpha: (tensor) 3D or 4D the scalar factor for this criterion + :param gamma: (float,double) gamma > 0 reduces the relative loss for well-classified examples (p>0.5) putting more + focus on hard misclassified example + :param smooth: (float,double) smooth value when cross entropy + :param balance_index: (int) balance class index, should be specific when alpha is float + :param size_average: (bool, optional) By default, the losses are averaged over each loss element in the batch. + """ + + def __init__(self, num_class, alpha=None, gamma=2, balance_index=-1, smooth=None, size_average=True): + super(FocalLoss, self).__init__() + self.num_class = num_class + self.alpha = alpha + self.gamma = gamma + self.smooth = smooth + self.size_average = size_average + + if self.alpha is None: + self.alpha = torch.ones(self.num_class, 1) + elif isinstance(self.alpha, (list, np.ndarray)): + assert len(self.alpha) == self.num_class + self.alpha = torch.FloatTensor(alpha).view(self.num_class, 1) + self.alpha = self.alpha / self.alpha.sum() + elif isinstance(self.alpha, float): + alpha = torch.ones(self.num_class, 1) + alpha = alpha * (1 - self.alpha) + alpha[balance_index] = self.alpha + self.alpha = alpha + else: + raise TypeError('Not support alpha type') + + if self.smooth is not None: + if self.smooth < 0 or self.smooth > 1.0: + raise ValueError('smooth value should be in [0,1]') + + def forward(self, input, target): + logit = F.softmax(input, dim=1) + + if logit.dim() > 2: + # N,C,d1,d2 -> N,C,m (m=d1*d2*...) + logit = logit.view(logit.size(0), logit.size(1), -1) + logit = logit.permute(0, 2, 1).contiguous() + logit = logit.view(-1, logit.size(-1)) + target = target.view(-1, 1) + + # N = input.size(0) + # alpha = torch.ones(N, self.num_class) + # alpha = alpha * (1 - self.alpha) + # alpha = alpha.scatter_(1, target.long(), self.alpha) + epsilon = 1e-10 + alpha = self.alpha + if alpha.device != input.device: + alpha = alpha.to(input.device) + + idx = target.cpu().long() + one_hot_key = torch.FloatTensor(target.size(0), self.num_class).zero_() + one_hot_key = one_hot_key.scatter_(1, idx, 1) + if one_hot_key.device != logit.device: + one_hot_key = one_hot_key.to(logit.device) + + if self.smooth: + one_hot_key = torch.clamp( + one_hot_key, self.smooth, 1.0 - self.smooth) + pt = (one_hot_key * logit).sum(1) + epsilon + logpt = pt.log() + + gamma = self.gamma + + alpha = alpha[idx] + loss = -1 * alpha * torch.pow((1 - pt), gamma) * logpt + + if self.size_average: + loss = loss.mean() + else: + loss = loss.sum() + return loss + +if __name__=="__main__": + # Example of label smoothing. + crit = LabelSmoothing(size=5,smoothing= 0.1) + #predict.shape 3 5 + predict = torch.FloatTensor([[0, 0.2, 0.7, 0.1, 0],[0, 0.9, 0.2, 0.1, 0],[1, 0.2, 0.7, 0.1, 0]]) + v = crit(Variable(predict.log()),Variable(torch.LongTensor([2, 1, 0]))) + # Show the target distributions expected by the system. + print('v',v) + plt.imshow(crit.true_dist) diff --git a/models/resnet.py b/models/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..433b22f51f92dfe5e3e0f4d503c1d64c54c3dadd --- /dev/null +++ b/models/resnet.py @@ -0,0 +1,262 @@ +import torch +import torch.nn as nn +import math +import torch.utils.model_zoo as model_zoo + +__all__ = ['ResNet', 'resnet18', 'resnet34', 'resnet50', 'resnet101', + 'resnet152'] + + +model_urls = { + 'resnet18': 'https://download.pytorch.org/models/resnet18-5c106cde.pth', + 'resnet34': 'https://download.pytorch.org/models/resnet34-333f7ec4.pth', + 'resnet50': 'https://download.pytorch.org/models/resnet50-19c8e357.pth', + 'resnet101': 'https://download.pytorch.org/models/resnet101-5d3b4d8f.pth', + 'resnet152': 'https://download.pytorch.org/models/resnet152-b121ed2d.pth', +} + + +def conv3x3(in_planes, out_planes, stride=1): + """3x3 convolution with padding""" + return nn.Conv2d(in_planes, out_planes, kernel_size=3, stride=stride, + padding=1, bias=False) + + +class BasicBlock(nn.Module): + expansion = 1 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(BasicBlock, self).__init__() + self.conv1 = conv3x3(inplanes, planes, stride) + self.bn1 = nn.BatchNorm2d(planes) + self.relu = nn.ReLU(inplace=True) + self.conv2 = conv3x3(planes, planes) + self.bn2 = nn.BatchNorm2d(planes) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + +class Bottleneck(nn.Module): + expansion = 4 + + def __init__(self, inplanes, planes, stride=1, downsample=None): + super(Bottleneck, self).__init__() + self.conv1 = nn.Conv2d(inplanes, planes, kernel_size=1, bias=False) + self.bn1 = nn.BatchNorm2d(planes) + self.conv2 = nn.Conv2d(planes, planes, kernel_size=3, stride=stride, + padding=1, bias=False) + self.bn2 = nn.BatchNorm2d(planes) + self.conv3 = nn.Conv2d(planes, planes * 4, kernel_size=1, bias=False) + self.bn3 = nn.BatchNorm2d(planes * 4) + self.relu = nn.ReLU(inplace=True) + self.downsample = downsample + self.stride = stride + + def forward(self, x): + residual = x + + out = self.conv1(x) + out = self.bn1(out) + out = self.relu(out) + + out = self.conv2(out) + out = self.bn2(out) + out = self.relu(out) + + out = self.conv3(out) + out = self.bn3(out) + + if self.downsample is not None: + residual = self.downsample(x) + + out += residual + out = self.relu(out) + + return out + + +class ResNet(nn.Module): + + def __init__(self, block, layers, num_classes=1000, img_size=224,dropout_factor = 1.): + self.inplanes = 64 + self.dropout_factor = dropout_factor + super(ResNet, self).__init__() + + self.conv1 = nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3, + bias=False) + self.bn1 = nn.BatchNorm2d(64) + self.relu = nn.ReLU(inplace=True) + # see this issue: https://github.com/xxradon/PytorchToCaffe/issues/16 + # self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1) + self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, ceil_mode=True) + self.layer1 = self._make_layer(block, 64, layers[0]) + self.layer2 = self._make_layer(block, 128, layers[1], stride=2) + self.layer3 = self._make_layer(block, 256, layers[2], stride=2) + self.layer4 = self._make_layer(block, 512, layers[3], stride=2) + + assert img_size % 32 == 0 + pool_kernel = int(img_size / 32) + self.avgpool = nn.AvgPool2d(pool_kernel, stride=1, ceil_mode=True) + + self.dropout = nn.Dropout(self.dropout_factor) + + self.fc = nn.Linear(512 * block.expansion, num_classes) + + for m in self.modules(): + if isinstance(m, nn.Conv2d): + n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels + m.weight.data.normal_(0, math.sqrt(2. / n)) + elif isinstance(m, nn.BatchNorm2d): + m.weight.data.fill_(1) + m.bias.data.zero_() + + def _make_layer(self, block, planes, blocks, stride=1): + downsample = None + if stride != 1 or self.inplanes != planes * block.expansion: + downsample = nn.Sequential( + nn.Conv2d(self.inplanes, planes * block.expansion, + kernel_size=1, stride=stride, bias=False), + nn.BatchNorm2d(planes * block.expansion), + ) + + layers = [] + layers.append(block(self.inplanes, planes, stride, downsample)) + self.inplanes = planes * block.expansion + for i in range(1, blocks): + layers.append(block(self.inplanes, planes)) + + return nn.Sequential(*layers) + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.relu(x) + x = self.maxpool(x) + + x = self.layer1(x) + x = self.layer2(x) + x = self.layer3(x) + x = self.layer4(x) + + x = self.avgpool(x) + x = x.view(x.size(0), -1) + + x = self.dropout(x) + + x = self.fc(x) + + return x + + +def load_model(model, pretrained_state_dict): + model_dict = model.state_dict() + pretrained_dict = {k: v for k, v in pretrained_state_dict.items() if + k in model_dict and model_dict[k].size() == pretrained_state_dict[k].size()} + model.load_state_dict(pretrained_dict, strict=False) + if len(pretrained_dict) == 0: + print("[INFO] No params were loaded ...") + else: + for k, v in pretrained_state_dict.items(): + if k in pretrained_dict: + print("==>> Load {} {}".format(k, v.size())) + else: + print("[INFO] Skip {} {}".format(k, v.size())) + return model + + +def resnet18(pretrained=False, **kwargs): + """Constructs a ResNet-18 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [2, 2, 2, 2], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet18'])) + print("Load pretrained model from {}".format(model_urls['resnet18'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet18']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet34(pretrained=False, **kwargs): + """Constructs a ResNet-34 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(BasicBlock, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet34'])) + print("Load pretrained model from {}".format(model_urls['resnet34'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet34']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet50(pretrained=False, **kwargs): + """Constructs a ResNet-50 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 6, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet50'])) + print("Load pretrained model from {}".format(model_urls['resnet50'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet50']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet101(pretrained=False, **kwargs): + """Constructs a ResNet-101 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 4, 23, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet101'])) + print("Load pretrained model from {}".format(model_urls['resnet101'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet101']) + model = load_model(model, pretrained_state_dict) + return model + + +def resnet152(pretrained=False, **kwargs): + """Constructs a ResNet-152 model. + + Args: + pretrained (bool): If True, returns a model pre-trained on ImageNet + """ + model = ResNet(Bottleneck, [3, 8, 36, 3], **kwargs) + if pretrained: + # model.load_state_dict(model_zoo.load_url(model_urls['resnet152'])) + print("Load pretrained model from {}".format(model_urls['resnet152'])) + pretrained_state_dict = model_zoo.load_url(model_urls['resnet152']) + model = load_model(model, pretrained_state_dict) + return model + +if __name__ == "__main__": + input = torch.randn([32, 3, 256,256]) + model = resnet34(False, num_classes=2, img_size=256) + output = model(input) + print(output.size()) diff --git a/train.py b/train.py new file mode 100644 index 0000000000000000000000000000000000000000..93f4af711554cb68ba12b35ce237725b68cbb3dd --- /dev/null +++ b/train.py @@ -0,0 +1,335 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# author: Eric.Lee +# function : train + +import os +import argparse +import torch +import torch.nn as nn +import torch.optim as optim +import sys + +# from tensorboardX import SummaryWriter +from utils.model_utils import * +from utils.common_utils import * +from data_iter.datasets import * +from loss.loss import FocalLoss +from models.resnet import resnet18, resnet34, resnet50, resnet101, resnet152 + +import cv2 +import time +import json +from datetime import datetime + +def tester(ops,epoch,model,criterion, + train_split,train_split_label,val_split,val_split_label, + use_cuda): + # + print('\n------------------------->>> tester traival loss') + + loss_train = [] + loss_val = [] + with torch.no_grad(): + # train loss + for i in range(len(train_split)): + file = train_split[i] + label = train_split_label[i] + + img = cv2.imread(file) + # 输入图片预处理 + if ops.fix_res: + img_ = letterbox(img,size_=ops.img_size[0],mean_rgb = (128,128,128)) + else: + img_ = cv2.resize(img, (ops.img_size[1],ops.img_size[0]), interpolation = cv2.INTER_CUBIC) + + img_ = img_.astype(np.float32) + img_ = (img_-128.)/256. + + img_ = img_.transpose(2, 0, 1) + img_ = torch.from_numpy(img_) + img_ = img_.unsqueeze_(0) + + label_ = np.array(label) + label_ = torch.from_numpy(label_).float() + + if use_cuda: + img_ = img_.cuda() # (bs, 3, h, w) + labels_ = label_.cuda() # (bs, 3, h, w) + + + output = model(img_.float()) + + loss = criterion(output, labels_) + loss_train.append(loss.item()) + # val loss + for i in range(len(val_split)): + file = val_split[i] + label = val_split_label[i] + + img = cv2.imread(file) + # 输入图片预处理 + if ops.fix_res: + img_ = letterbox(img,size_=ops.img_size[0],mean_rgb = (128,128,128)) + else: + img_ = cv2.resize(img, (ops.img_size[1],ops.img_size[0]), interpolation = cv2.INTER_CUBIC) + + img_ = img_.astype(np.float32) + img_ = (img_-128.)/256. + + img_ = img_.transpose(2, 0, 1) + img_ = torch.from_numpy(img_) + img_ = img_.unsqueeze_(0) + + label_ = np.array(label) + label_ = torch.from_numpy(label_).float() + + if use_cuda: + img_ = img_.cuda() # (bs, 3, h, w) + labels_ = label_.cuda() # (bs, 3, h, w) + + + output = model(img_.float()) + + loss = criterion(output, labels_) + loss_val.append(loss.item()) + + print('loss_train : {}, loss_val : {} '.format(np.mean(loss_train),np.mean(loss_val))) + + return np.mean(loss_train),np.mean(loss_val) + + +def trainer(ops,f_log): + try: + os.environ['CUDA_VISIBLE_DEVICES'] = ops.GPUS + + if ops.log_flag: + sys.stdout = f_log + + set_seed(ops.seed) + + train_split,train_split_label,val_split,val_split_label = split_trainval_datasets(ops) + + train_path = ops.train_path + num_classes = len(os.listdir(ops.train_path)) # 模型类别个数 + print('num_classes : ',num_classes) + #---------------------------------------------------------------- 构建模型 + print('use model : %s'%(ops.model)) + + if ops.model == 'resnet_18': + model_=resnet18(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_34': + model_=resnet34(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_50': + model_=resnet50(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_101': + model_=resnet101(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) + elif ops.model == 'resnet_152': + model_=resnet152(pretrained = ops.pretrained, num_classes=ops.num_classes, img_size=ops.img_size[0],dropout_factor=ops.dropout) + else: + print('error no the struct model : {}'.format(ops.model)) + + print('model_.fc : {}'.format(model_.fc)) + + + use_cuda = torch.cuda.is_available() + + device = torch.device("cuda:0" if use_cuda else "cpu") + model_ = model_.to(device) + + # print(model_)# 打印模型结构 + # Dataset + dataset = LoadImagesAndLabels(path = ops.train_path,img_size=ops.img_size,flag_agu=ops.flag_agu,fix_res = ops.fix_res,val_split = val_split) + print('len train datasets : %s'%(dataset.__len__())) + # Dataloader + dataloader = DataLoader(dataset, + batch_size=ops.batch_size, + num_workers=ops.num_workers, + shuffle=True, + pin_memory=False, + drop_last = True) + # 优化器设计 + # optimizer_Adam = torch.optim.Adam(model_.parameters(), lr=init_lr, betas=(0.9, 0.99),weight_decay=1e-6) + optimizer_SGD = optim.SGD(model_.parameters(), lr=ops.init_lr, momentum=0.9, weight_decay=ops.weight_decay)# 优化器初始化 + optimizer = optimizer_SGD + # 加载 finetune 模型 + if os.access(ops.fintune_model,os.F_OK):# checkpoint + chkpt = torch.load(ops.fintune_model, map_location=device) + model_.load_state_dict(chkpt) + print('load fintune model : {}'.format(ops.fintune_model)) + + print('/**********************************************/') + # 损失函数 + if 'focalLoss' == ops.loss_define: + criterion = FocalLoss(num_class = num_classes) + else: + criterion = nn.CrossEntropyLoss()#CrossEntropyLoss() 是 softmax 和 负对数损失的结合 + + step = 0 + idx = 0 + + # 变量初始化 + best_loss = np.inf + loss_mean = 0. # 损失均值 + loss_idx = 0. # 损失计算计数器 + flag_change_lr_cnt = 0 # 学习率更新计数器 + init_lr = ops.init_lr # 学习率 + + epochs_loss_dict = {} + + for epoch in range(0, ops.epochs): + if ops.log_flag: + sys.stdout = f_log + print('\nepoch %d ------>>>'%epoch) + model_.train() + # 学习率更新策略 + if loss_mean!=0.: + if best_loss > (loss_mean/loss_idx): + flag_change_lr_cnt = 0 + best_loss = (loss_mean/loss_idx) + else: + flag_change_lr_cnt += 1 + + if flag_change_lr_cnt > 5: + init_lr = init_lr*ops.lr_decay + set_learning_rate(optimizer, init_lr) + flag_change_lr_cnt = 0 + + loss_mean = 0. # 损失均值 + loss_idx = 0. # 损失计算计数器 + + for i, (imgs_, labels_) in enumerate(dataloader): + + if use_cuda: + imgs_ = imgs_.cuda() # pytorch 的 数据输入格式 : (batch, channel, height, width) + labels_ = labels_.cuda() + + output = model_(imgs_.float()) + + loss = criterion(output, labels_) + loss_mean += loss.item() + loss_idx += 1. + if i%10 == 0: + acc = get_acc(output, labels_) + loc_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) + print(' %s - %s - epoch [%s/%s] (%s/%s):'%(loc_time,ops.model,epoch,ops.epochs,i,int(dataset.__len__()/ops.batch_size)),\ + 'mean loss : %.6f, loss : %.6f'%(loss_mean/loss_idx,loss.item()),\ + ' acc : %.4f'%acc,' lr : %.5f'%init_lr,' bs :',ops.batch_size,\ + ' img_size: %s x %s'%(ops.img_size[0],ops.img_size[1]),' best_loss: %.4f'%best_loss) + + # 计算梯度 + loss.backward() + # 优化器对模型参数更新 + optimizer.step() + # 优化器梯度清零 + optimizer.zero_grad() + step += 1 + + # 一个 epoch 保存连词最新的 模型 + if i%(int(dataset.__len__()/ops.batch_size/2-1)) == 0 and i > 0: + torch.save(model_.state_dict(), ops.model_exp + 'latest.pth') + # 每一个 epoch 进行模型保存 + torch.save(model_.state_dict(), ops.model_exp + 'model_epoch-{}.pth'.format(epoch)) + + if len(val_split) > 0 and (epoch%ops.test_interval==0): # test + + model_.eval() + loss_train,loss_val = tester(ops,epoch,model_,criterion, + train_split,train_split_label,val_split,val_split_label, + use_cuda) + + epochs_loss_dict['epoch_'+str(epoch)] = {} + + epochs_loss_dict['epoch_'+str(epoch)]['loss_train'] = loss_train + epochs_loss_dict['epoch_'+str(epoch)]['loss_val'] = loss_val + + f_loss = open(ops.model_exp + 'loss_epoch_trainval.json',"w",encoding='utf-8') + json.dump(epochs_loss_dict,f_loss,ensure_ascii=False,indent = 1,cls = JSON_Encoder) + f_loss.close() + + except Exception as e: + print('Exception : ',e) # 打印异常 + print('Exception file : ', e.__traceback__.tb_frame.f_globals['__file__'])# 发生异常所在的文件 + print('Exception line : ', e.__traceback__.tb_lineno)# 发生异常所在的行数 + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description=' Project Classification Train') + parser.add_argument('--seed', type=int, default = 123, + help = 'seed') # 设置随机种子 + parser.add_argument('--model_exp', type=str, default = './model_exp', + help = 'model_exp') # 模型输出文件夹 + parser.add_argument('--model', type=str, default = 'resnet_50', + help = 'model : resnet_18,resnet_34,resnet_50,resnet_101,resnet_152') # 模型类型 + parser.add_argument('--num_classes', type=int , default = 120, + help = 'num_classes') # 分类类别个数 + parser.add_argument('--GPUS', type=str, default = '0', + help = 'GPUS') # GPU选择 + parser.add_argument('--train_path', type=str, default = './datasets/train/', + help = 'train_path') # 训练集路径 + parser.add_argument('--val_factor', type=float, default = 0.0, + help = 'val_factor') # 从训练集中分离验证集对应的比例 + parser.add_argument('--test_interval', type=int, default = 1, + help = 'test_interval') # 训练集和测试集 计算 loss 间隔 + parser.add_argument('--pretrained', type=bool, default = True, + help = 'imageNet_Pretrain') # 初始化学习率 + parser.add_argument('--fintune_model', type=str, default = 'None', + help = 'fintune_model') # fintune model + parser.add_argument('--loss_define', type=str, default = 'focalLoss', + help = 'define_loss') # 损失函数定义 + parser.add_argument('--init_lr', type=float, default = 1e-3, + help = 'init_learningRate') # 初始化学习率 + parser.add_argument('--lr_decay', type=float, default = 0.96, + help = 'learningRate_decay') # 学习率权重衰减率 + parser.add_argument('--weight_decay', type=float, default = 1e-8, + help = 'weight_decay') # 优化器正则损失权重 + parser.add_argument('--batch_size', type=int, default = 32, + help = 'batch_size') # 训练每批次图像数量 + parser.add_argument('--dropout', type=float, default = 0.5, + help = 'dropout') # dropout + parser.add_argument('--epochs', type=int, default = 1000, + help = 'epochs') # 训练周期 + parser.add_argument('--num_workers', type=int, default = 1, + help = 'num_workers') # 训练数据生成器线程数 + parser.add_argument('--img_size', type=tuple , default = (256,256), + help = 'img_size') # 输入模型图片尺寸 + parser.add_argument('--flag_agu', type=bool , default = True, + help = 'data_augmentation') # 训练数据生成器是否进行数据扩增 + parser.add_argument('--fix_res', type=bool , default = False, + help = 'fix_resolution') # 输入模型样本图片是否保证图像分辨率的长宽比 + parser.add_argument('--clear_model_exp', type=bool, default = False, + help = 'clear_model_exp') # 模型输出文件夹是否进行清除 + parser.add_argument('--log_flag', type=bool, default = False, + help = 'log flag') # 是否保存训练 log + + #-------------------------------------------------------------------------- + args = parser.parse_args()# 解析添加参数 + #-------------------------------------------------------------------------- + mkdir_(args.model_exp, flag_rm=args.clear_model_exp) + loc_time = time.localtime() + args.model_exp = args.model_exp + '/' + time.strftime("%Y-%m-%d_%H-%M-%S", loc_time)+'/' + mkdir_(args.model_exp, flag_rm=args.clear_model_exp) + + f_log = None + if args.log_flag: + f_log = open(args.model_exp+'/train_{}.log'.format(time.strftime("%Y-%m-%d_%H-%M-%S",loc_time)), 'a+') + sys.stdout = f_log + + print('---------------------------------- log : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", loc_time))) + print('\n/******************* {} ******************/\n'.format(parser.description)) + + unparsed = vars(args) # parse_args()方法的返回值为namespace,用vars()内建函数化为字典 + for key in unparsed.keys(): + print('{} : {}'.format(key,unparsed[key])) + + unparsed['time'] = time.strftime("%Y-%m-%d %H:%M:%S", loc_time) + + fs = open(args.model_exp+'train_ops.json',"w",encoding='utf-8') + json.dump(unparsed,fs,ensure_ascii=False,indent = 1) + fs.close() + + trainer(ops = args,f_log = f_log)# 模型训练 + + if args.log_flag: + sys.stdout = f_log + print('well done : {}'.format(time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()))) diff --git a/utils/common_utils.py b/utils/common_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..15f6ac3bb4654753ff2b4966b71e2b05a413cf6b --- /dev/null +++ b/utils/common_utils.py @@ -0,0 +1,43 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# author: Eric.Lee +# function: utils + +import os +import shutil +import cv2 +import numpy as np +import json + +def mkdir_(path, flag_rm=False): + if os.path.exists(path): + if flag_rm == True: + shutil.rmtree(path) + os.mkdir(path) + print('remove {} done ~ '.format(path)) + else: + os.mkdir(path) + +def plot_box(bbox, img, color=None, label=None, line_thickness=None): + tl = line_thickness or round(0.002 * max(img.shape[0:2])) + 1 + color = color or [random.randint(0, 255) for _ in range(3)] + c1, c2 = (int(bbox[0]), int(bbox[1])), (int(bbox[2]), int(bbox[3])) + cv2.rectangle(img, c1, c2, color, thickness=tl)# 目标的bbox + if label: + tf = max(tl - 2, 1) + t_size = cv2.getTextSize(label, 0, fontScale=tl / 3, thickness=tf)[0] # label size + c2 = c1[0] + t_size[0], c1[1] - t_size[1] - 3 # 字体的bbox + cv2.rectangle(img, c1, c2, color, -1) # label 矩形填充 + # 文本绘制 + cv2.putText(img, label, (c1[0], c1[1] - 2), 0, tl / 4, [225, 255, 255],thickness=tf, lineType=cv2.LINE_AA) + +class JSON_Encoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, np.integer): + return int(obj) + elif isinstance(obj, np.floating): + return float(obj) + elif isinstance(obj, np.ndarray): + return obj.tolist() + else: + return super(JSON_Encoder, self).default(obj) diff --git a/utils/model_utils.py b/utils/model_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..9e28bbd60083b301970706a3418273dfa0e05a37 --- /dev/null +++ b/utils/model_utils.py @@ -0,0 +1,61 @@ +#-*-coding:utf-8-*- +# date:2020-04-11 +# author: Eric.Lee +# function : utils + +import os +import numpy as np +import torch +import torch.backends.cudnn as cudnn +import random + +def get_acc(output, label): + total = output.shape[0] + _, pred_label = output.max(1) + num_correct = (pred_label == label).sum().item() + return num_correct / float(total) + +def set_learning_rate(optimizer, lr): + for param_group in optimizer.param_groups: + param_group['lr'] = lr + +def set_seed(seed = 666): + np.random.seed(seed) + random.seed(seed) + torch.manual_seed(seed) + if torch.cuda.is_available(): + torch.cuda.manual_seed(seed) + torch.cuda.manual_seed_all(seed) + cudnn.deterministic = True + +def split_trainval_datasets(ops): + print(' --------------->>> split_trainval_datasets ') + train_split_datasets = [] + train_split_datasets_label = [] + + val_split_datasets = [] + val_split_datasets_label = [] + for idx,doc in enumerate(sorted(os.listdir(ops.train_path), key=lambda x:int(x.split('-')[0]), reverse=False)): + # print(' %s label is %s \n'%(doc,idx)) + + data_list = os.listdir(ops.train_path+doc) + random.shuffle(data_list) + + cal_split_num = int(len(data_list)*ops.val_factor) + + for i,file in enumerate(data_list): + if '.jpg' in file: + if i < cal_split_num: + val_split_datasets.append(ops.train_path+doc + '/' + file) + val_split_datasets_label.append(idx) + else: + train_split_datasets.append(ops.train_path+doc + '/' + file) + train_split_datasets_label.append(idx) + + print(ops.train_path+doc + '/' + file,idx) + + print('\n') + print('train_split_datasets len {}'.format(len(train_split_datasets))) + print('val_split_datasets len {}'.format(len(val_split_datasets))) + + return train_split_datasets,train_split_datasets_label,val_split_datasets,val_split_datasets_label