提交 f495e3a1 编写于 作者: H hypox64

Preprocessing data asynchronously

上级 7041197e
......@@ -4,6 +4,8 @@ import time
import numpy as np
import torch
from torch import nn, optim
# from multiprocessing import Process, Queue
import torch.multiprocessing as mp
import warnings
warnings.filterwarnings("ignore")
......@@ -23,7 +25,8 @@ class Core(object):
self.opt = opt
self.epoch = 1
if self.opt.gpu_id != -1:
torch.cuda.set_device(self.opt.gpu_id)
os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.gpu_id)
#torch.cuda.set_device(self.opt.gpu_id)
if not self.opt.no_cudnn:
torch.backends.cudnn.benchmark = True
......@@ -34,6 +37,9 @@ class Core(object):
self.criterion_class = nn.CrossEntropyLoss(self.opt.weight)
self.criterion_auto = nn.MSELoss()
self.epoch = 1
self.plot_result = {'train':[],'eval':[]}
self.confusion_mats = []
self.test_flag = True
if printflag:
util.writelog('network:\n'+str(self.net),self.opt,True)
......@@ -60,77 +66,99 @@ class Core(object):
example = torch.rand(1,self.opt.input_nc, self.opt.finesize)
traced_script_module = torch.jit.trace(self.net, example)
traced_script_module.save(os.path.join(self.opt.save_dir,'model.pt'))
print('Save traced network, example shape:',(1,self.opt.input_nc, self.opt.finesize))
if self.opt.gpu_id != -1:
self.net.cuda()
def eval(self,signals,labels,sequences,plot_result={}):
def preprocessing(self,signals, labels, sequences):
for i in range(len(sequences)//self.opt.batchsize):
signal,label = transformer.batch_generator(signals, labels, sequences[i*self.opt.batchsize:(i+1)*self.opt.batchsize])
signal = transformer.ToInputShape(signal,self.opt,test_flag =self.test_flag)
self.queue.put([signal,label])
# def process_pool_init(self,signals,labels,sequences):
# self.queue = mp.Queue(self.opt.load_process*2)
# part_len = len(sequences)//self.opt.load_process//self.opt.batchsize*self.opt.batchsize
# for i in range(self.opt.load_process):
# if i == (self.opt.load_process -1):
# p = mp.Process(target=self.preprocessing,args=(signals,labels,sequences[i*part_len:]))
# else:
# p = mp.Process(target=self.preprocessing,args=(signals,labels,sequences[i*part_len:(i+1)*part_len]))
# p.daemon = True
# p.start()
def process_pool_init(self,signals,labels,sequences):
self.queue = mp.Queue()
p = mp.Process(target=self.preprocessing,args=(signals,labels,sequences))
p.daemon = True
p.start()
def forward(self,signal,label,features,confusion_mat):
if self.opt.model_name == 'autoencoder':
out,feature = self.net(signal)
loss = self.criterion_auto(out, signal)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,:self.opt.feature] = (feature.data.cpu().numpy()).reshape(self.opt.batchsize,-1)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,self.opt.feature] = label.data.cpu().numpy()
else:
out = self.net(signal)
loss = self.criterion_class(out, label)
pred = (torch.max(out, 1)[1]).data.cpu().numpy()
label=label.data.cpu().numpy()
for x in range(len(pred)):
confusion_mat[label[x]][pred[x]] += 1
return loss,features,confusion_mat
def train(self,signals,labels,sequences):
self.net.train()
self.test_flag = False
epoch_loss = 0
confusion_mat = np.zeros((self.opt.label,self.opt.label), dtype=int)
features = np.zeros((len(sequences)//self.opt.batchsize*self.opt.batchsize,self.opt.feature+1))
np.random.shuffle(sequences)
self.process_pool_init(signals, labels, sequences)
for i in range(len(sequences)//self.opt.batchsize):
signal,label = self.queue.get()
signal,label = transformer.ToTensor(signal,label,gpu_id =self.opt.gpu_id)
loss,features,confusion_mat=self.forward(signal, label, features, confusion_mat)
epoch_loss += loss.item()
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
self.plot_result['train'].append(epoch_loss/i)
plot.draw_loss(self.plot_result,self.epoch+i/(sequences.shape[0]/self.opt.batchsize),self.opt)
# if self.opt.model_name != 'autoencoder':
# plot.draw_heatmap(confusion_mat,self.opt,name = 'current_train')
def eval(self,signals,labels,sequences):
self.test_flag = True
confusion_mat = np.zeros((self.opt.label,self.opt.label), dtype=int)
features = np.zeros((len(sequences)//self.opt.batchsize*self.opt.batchsize,self.opt.feature+1))
epoch_loss = 0
self.process_pool_init(signals, labels, sequences)
for i in range(len(sequences)//self.opt.batchsize):
signal,label = transformer.batch_generator(signals, labels, sequences[i*self.opt.batchsize:(i+1)*self.opt.batchsize])
signal = transformer.ToInputShape(signal,self.opt,test_flag =True)
signal,label = self.queue.get()
signal,label = transformer.ToTensor(signal,label,gpu_id =self.opt.gpu_id)
with torch.no_grad():
if self.opt.model_name == 'autoencoder':
out,feature = self.net(signal)
loss = self.criterion_auto(out, signal)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,:self.opt.feature] = (feature.data.cpu().numpy()).reshape(self.opt.batchsize,-1)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,self.opt.feature] = label.data.cpu().numpy()
else:
out = self.net(signal)
loss = self.criterion_class(out, label)
pred = (torch.max(out, 1)[1]).data.cpu().numpy()
label=label.data.cpu().numpy()
for x in range(len(pred)):
confusion_mat[label[x]][pred[x]] += 1
loss,features,confusion_mat=self.forward(signal, label, features, confusion_mat)
epoch_loss += loss.item()
if self.opt.model_name != 'autoencoder':
recall,acc,sp,err,k = statistics.report(confusion_mat)
plot.draw_heatmap(confusion_mat,self.opt,name = 'current_test')
#plot.draw_heatmap(confusion_mat,self.opt,name = 'current_eval')
print('epoch:'+str(self.epoch),' macro-prec,reca,F1,err,kappa: '+str(statistics.report(confusion_mat)))
else:
plot.draw_autoencoder_result(signal.data.cpu().numpy(), out.data.cpu().numpy(),self.opt)
print('epoch:'+str(self.epoch),' loss: '+str(round(epoch_loss/i,5)))
plot.draw_scatter(features, self.opt)
plot_result['test'].append(epoch_loss/i)
self.plot_result['eval'].append(epoch_loss/i)
self.epoch +=1
self.confusion_mats.append(confusion_mat)
# return confusion_mat
return plot_result,confusion_mat
def train(self,signals,labels,sequences,plot_result={}):
self.net.train()
epoch_loss = 0
confusion_mat = np.zeros((self.opt.label,self.opt.label), dtype=int)
features = np.zeros((len(sequences)//self.opt.batchsize*self.opt.batchsize,self.opt.feature+1))
for i in range(len(sequences)//self.opt.batchsize):
signal,label = transformer.batch_generator(signals, labels, sequences[i*self.opt.batchsize:(i+1)*self.opt.batchsize])
signal = transformer.ToInputShape(signal,self.opt,test_flag =False)
signal,label = transformer.ToTensor(signal,label,gpu_id =self.opt.gpu_id)
if self.opt.model_name == 'autoencoder':
out,feature = self.net(signal)
loss = self.criterion_auto(out, signal)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,:self.opt.feature] = (feature.data.cpu().numpy()).reshape(self.opt.batchsize,-1)
features[i*self.opt.batchsize:(i+1)*self.opt.batchsize,self.opt.feature] = label.data.cpu().numpy()
else:
out = self.net(signal)
loss = self.criterion_class(out, label)
pred = (torch.max(out, 1)[1]).data.cpu().numpy()
label=label.data.cpu().numpy()
for x in range(len(pred)):
confusion_mat[label[x]][pred[x]] += 1
epoch_loss += loss.item()
self.optimizer.zero_grad()
loss.backward()
self.optimizer.step()
plot_result['train'].append(epoch_loss/i)
plot.draw_loss(plot_result,self.epoch+i/(sequences.shape[0]/self.opt.batchsize),self.opt)
if self.opt.model_name != 'autoencoder':
plot.draw_heatmap(confusion_mat,self.opt,name = 'current_train')
return plot_result
\ No newline at end of file
......@@ -2,14 +2,14 @@ import os
import time
import shutil
import numpy as np
import random
import torch
from torch import nn, optim
import warnings
from util import util,transformer,dataloader,statistics,plot,options
from util import array_operation as arr
from models import creatnet,io
from train import trainnet,evalnet
from models import creatnet,core
opt = options.Options()
opt.parser.add_argument('--ip',type=str,default='', help='')
......@@ -17,31 +17,23 @@ opt = opt.getparse()
torch.cuda.set_device(opt.gpu_id)
opt.k_fold = 0
opt.save_dir = './datasets/server/tmp'
util.makedirs(opt.save_dir)
'''load ori data'''
signals,labels = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals.shape)
'''def network'''
net=creatnet.CreatNet(opt)
if opt.pretrained != '':
net.load_state_dict(torch.load(opt.pretrained))
io.show_paramsnumber(net,opt)
if opt.gpu_id != -1:
net.cuda()
if not opt.no_cudnn:
torch.backends.cudnn.benchmark = True
optimizer = torch.optim.Adam(net.parameters(), lr=opt.lr)
criterion_class = nn.CrossEntropyLoss(opt.weight)
criterion_auto = nn.MSELoss()
core = core.Core(opt)
core.network_init(printflag=True)
'''Receive data'''
if os.path.isdir('./datasets/server/data'):
shutil.rmtree('./datasets/server/data')
os.system('unzip ./datasets/server/data.zip -d ./datasets/server/')
categorys = os.listdir('./datasets/server/data')
categorys.sort()
print('categorys:',categorys)
receive_category = len(categorys)
received_signals = []
received_labels = []
......@@ -55,48 +47,41 @@ for i in range(receive_category):
signal_ori = np.zeros(len(txt_split))
for point in range(len(txt_split)):
signal_ori[point] = float(txt_split[point])
signal = arr.normliaze(signal_ori,'5_95',truncated=4)
for j in range(1,len(signal)//opt.loadsize-1):
received_signals.append(signal[j*opt.loadsize:(j+1)*opt.loadsize])
# #just cut
# for j in range(1,len(signal_ori)//opt.loadsize-1):
# this_signal = signal_ori[j*opt.loadsize:(j+1)*opt.loadsize]
# this_signal = arr.normliaze(this_signal,'5_95',truncated=4)
# received_signals.append(this_signal)
# received_labels.append(i)
#random cut
for j in range(500//len(samples)-1):
ran = random.randint(1000, len(signal_ori)-2000-1)
this_signal = signal_ori[ran:ran+2000]
this_signal = arr.normliaze(this_signal,'5_95',truncated=4)
received_signals.append(this_signal)
received_labels.append(i)
received_signals = np.array(received_signals).reshape(-1,opt.input_nc,opt.loadsize)
received_labels = np.array(received_labels).reshape(-1,1)
# print(labels)
'''merge data'''
signals = signals[receive_category*500:]
labels = labels[receive_category*500:]
signals = np.concatenate((signals, received_signals))
labels = np.concatenate((labels, received_labels))
# print(received_signals.shape,received_labels.shape)
# print(signals.shape,labels.shape)
transformer.shuffledata(signals,labels)
'''
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals.shape)
train_sequences,test_sequences = transformer.k_fold_generator(len(labels),opt.k_fold)
final_confusion_mat = np.zeros((opt.label,opt.label), dtype=int)
confusion_mats = []
plot_result = {'train':[],'test':[]}
for epoch in range(opt.epochs):
t1 = time.time()
np.random.shuffle(train_sequences[fold])
plot_result = trainnet(net,signals,labels,train_sequences[fold],epoch+1,plot_result)
plot_result,confusion_mat_eval = evalnet(net,signals,labels,test_sequences[fold],epoch+1,plot_result)
confusion_mats.append(confusion_mat_eval)
torch.save(net.cpu().state_dict(),os.path.join(opt.save_dir,'last.pth'))
if (epoch+1)%opt.network_save_freq == 0:
torch.save(net.cpu().state_dict(),os.path.join(opt.save_dir,opt.model_name+'_epoch'+str(epoch+1)+'.pth'))
print('network saved.')
if opt.gpu_id != -1:
net.cuda()
core.train(signals,labels,train_sequences[0])
core.eval(signals,labels,test_sequences[0])
t2=time.time()
if epoch+1==1:
util.writelog('>>> per epoch cost time:'+str(round((t2-t1),2))+'s',opt,True)
# signals,labels = dataloader.loaddataset(opt)
# label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
'''
\ No newline at end of file
core.save_traced_net()
......@@ -11,7 +11,7 @@ from models import creatnet
2020/04/03
'''
opt = options.Options().getparse()
net = creatnet.CreatNet(opt)
net = creatnet.creatnet(opt)
#load data
signals = np.load('./datasets/simple_test/signals.npy')
......
......@@ -28,7 +28,7 @@ signals,labels = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
util.writelog('label statistics: '+str(label_cnt),opt,True)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals.shape)
train_sequences,test_sequences = transformer.k_fold_generator(len(labels),opt.k_fold)
train_sequences,eval_sequences = transformer.k_fold_generator(len(labels),opt.k_fold)
t2 = time.time()
print('load data cost time: %.2f'% (t2-t1),'s')
......@@ -42,37 +42,35 @@ for fold in range(opt.k_fold):
core.network_init()
final_confusion_mat = np.zeros((opt.label,opt.label), dtype=int)
confusion_mats = []
plot_result = {'train':[],'test':[]}
# confusion_mats = []
for epoch in range(opt.epochs):
t1 = time.time()
np.random.shuffle(train_sequences[fold])
plot_result = core.train(signals,labels,train_sequences[fold],plot_result)
plot_result,confusion_mat_eval = core.eval(signals,labels,test_sequences[fold],plot_result)
confusion_mats.append(confusion_mat_eval)
core.train(signals,labels,train_sequences[fold])
core.eval(signals,labels,eval_sequences[fold])
# confusion_mats.append(confusion_mat_eval)
core.save()
t2=time.time()
if epoch+1==1:
util.writelog('>>> per epoch cost time:'+str(round((t2-t1),2))+'s',opt,True)
#save result
if opt.model_name != 'autoencoder':
pos = plot_result['test'].index(min(plot_result['test']))-1
final_confusion_mat = confusion_mats[pos]
pos = core.plot_result['eval'].index(min(core.plot_result['eval']))-1
final_confusion_mat = core.confusion_mats[pos]
if opt.k_fold==1:
statistics.statistics(final_confusion_mat, opt, 'final', 'final_test')
statistics.statistics(final_confusion_mat, opt, 'final', 'final_eval')
np.save(os.path.join(opt.save_dir,'confusion_mat.npy'), final_confusion_mat)
else:
fold_final_confusion_mat += final_confusion_mat
util.writelog('fold -> macro-prec,reca,F1,err,kappa: '+str(statistics.report(final_confusion_mat)),opt,True)
util.writelog('confusion_mat:\n'+str(final_confusion_mat)+'\n',opt,True)
plot.draw_heatmap(final_confusion_mat,opt,name = 'fold'+str(fold+1)+'_test')
plot.draw_heatmap(final_confusion_mat,opt,name = 'fold'+str(fold+1)+'_eval')
if opt.model_name != 'autoencoder':
if opt.k_fold != 1:
statistics.statistics(fold_final_confusion_mat, opt, 'final', 'k-fold-final_test')
statistics.statistics(fold_final_confusion_mat, opt, 'final', 'k-fold-final_eval')
np.save(os.path.join(opt.save_dir,'confusion_mat.npy'), fold_final_confusion_mat)
if opt.mergelabel:
......
......@@ -2,7 +2,6 @@ import argparse
import os
import time
import numpy as np
import torch
from . import util
class Options():
......@@ -61,6 +60,9 @@ class Options():
self.initialize()
self.opt = self.parser.parse_args()
if self.opt.gpu_id != -1:
os.environ["CUDA_VISIBLE_DEVICES"] = str(self.opt.gpu_id)
if self.opt.label !='auto':
self.opt.label = int(self.opt.label)
if self.opt.input_nc !='auto':
......@@ -125,6 +127,7 @@ def get_auto_options(opt,label_cnt_per,label_num,shape):
opt.weight = 1/label_cnt_per
opt.weight = opt.weight/np.min(opt.weight)
util.writelog('Loss_weight:'+str(opt.weight),opt,True)
import torch
opt.weight = torch.from_numpy(opt.weight).float()
if opt.gpu_id != -1:
opt.weight = opt.weight.cuda()
......@@ -138,7 +141,7 @@ def get_auto_options(opt,label_cnt_per,label_num,shape):
for i in range(opt.label):
names.append(str(i))
opt.label_name = names
else:
elif not isinstance(opt.label_name,list):
opt.label_name = opt.label_name.replace(" ", "").split(",")
return opt
\ No newline at end of file
......@@ -164,11 +164,11 @@ def draw_heatmap(mat,opt,name = 'train'):
def draw_loss(plot_result,epoch,opt):
train = np.array(plot_result['train'])
test = np.array(plot_result['test'])
val = np.array(plot_result['eval'])
plt.figure('running loss')
plt.clf()
train_x = np.linspace(0,epoch,len(train))
test_x = np.linspace(0,int(epoch),len(test))
test_x = np.linspace(0,int(epoch),len(val))
plt.xlabel('Epoch')
plt.ylabel('loss')
if epoch <10:
......@@ -176,7 +176,7 @@ def draw_loss(plot_result,epoch,opt):
else:
plt.xlim((0,epoch))
plt.plot(train_x,train,label='train',linewidth = 1.5)
plt.plot(test_x,test,label='test', linewidth = 1.5)
plt.plot(test_x,val,label='eval', linewidth = 1.5)
plt.legend(loc=1)
plt.title('Running loss',fontsize='large')
plt.savefig(os.path.join(opt.save_dir,'running_loss.png'))
......
......@@ -148,4 +148,4 @@ def ToInputShape(data,opt,test_flag = False):
# result = Normalize(result, maxmin=0.5, avg=0.0150, sigma=0.0500)
# result = result.reshape(batchsize,1,224,122)
return result
return result.astype(np.float32)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册