提交 fdbfced6 编写于 作者: H hypox64

Optimize k-fold

上级 5ee4d56d
......@@ -73,3 +73,28 @@ labels = np.array([0,0,0,0,0,1,1,1,1,1]) #0->class0 1->class1
```
* step2: input ```--dataset_dir "your_dataset_dir"``` when running code.
### About k-fold
```--k_fold```&```--fold_index```<br>
* k_fold
```python
# fold_num of k-fold. If 0 or 1, no k-fold and cut 0.8 to train and other to eval.
```
* fold_index
```python
"""--fold_index
5-fold:
Cut dataset into sub-set using index , and then run k-fold with sub-set
If input 'auto', it will shuffle dataset and then cut dataset equally
If input: [2,4,6,7]
when len(dataset) == 10
sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:]
---------------------------------------------------------------
No-fold:
If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval
If input: [5]
when len(dataset) == 10
train-set : dataset[0:5] eval-set : dataset[5:]
"""
```
### [ More options](./util/options.py).
\ No newline at end of file
......@@ -75,3 +75,27 @@ labels = np.array([0,0,0,0,0,1,1,1,1,1]) #0->class0 1->class1
```
* step2: 输入 ```--dataset_dir "your_dataset_dir"``` 当运行代码的时候.
### 关于 k-fold
```--k_fold```&```--fold_index```<br>
* k_fold
```python
# fold_num of k-fold. If 0 or 1, no k-fold and cut 0.8 to train and other to eval
```
* fold_index
```python
"""--fold_index
5-fold:
Cut dataset into sub-set using index , and then run k-fold with sub-set
If input 'auto', it will shuffle dataset and then cut dataset equally
If input: [2,4,6,7]
when len(dataset) == 10
sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:]
---------------------------------------------------------------
No-fold:
If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval
If input: [5]
when len(dataset) == 10
train-set : dataset[0:5] eval-set : dataset[5:]
"""
```
### [ More options](./util/options.py).
\ No newline at end of file
......@@ -4,7 +4,6 @@ import shutil
import random
import torch
import numpy as np
import matplotlib.pyplot as plt
import sys
sys.path.append("..")
......@@ -22,8 +21,10 @@ util.makedirs(opt.save_dir)
util.makedirs(opt.rec_tmp)
# -----------------------------Load original data-----------------------------
ori_signals_train,ori_labels_train,ori_signals_eval,ori_labels_eval = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(ori_labels_train)
signals,labels = dataloader.loaddataset(opt)
ori_signals_train,ori_labels_train,ori_signals_eval,ori_labels_eval = \
signals[:opt.fold_index[0]].copy(),labels[:opt.fold_index[0]].copy(),signals[opt.fold_index[0]:].copy(),labels[opt.fold_index[0]:].copy()
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
opt = options.get_auto_options(opt, label_cnt_per, label_num, ori_signals_train)
# -----------------------------def network-----------------------------
......@@ -78,19 +79,15 @@ def train(opt):
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels_train)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals_train)
train_sequences= transformer.k_fold_generator(len(labels_train),opt.k_fold,opt.separated)
eval_sequences= transformer.k_fold_generator(len(labels_eval),opt.k_fold,opt.separated)
train_sequences = np.linspace(0, len(labels_train)-1,len(labels_train),dtype=np.int64)
eval_sequences = np.linspace(0, len(labels_eval)-1,len(labels_eval),dtype=np.int64)
for epoch in range(opt.epochs):
t1 = time.time()
if opt.separated:
#print(signals_train.shape,labels_train.shape)
core.train(signals_train,labels_train,train_sequences)
core.eval(signals_eval,labels_eval,eval_sequences)
else:
core.train(signals,labels,train_sequences[fold])
core.eval(signals,labels,eval_sequences[fold])
core.train(signals_train,labels_train,train_sequences)
core.eval(signals_eval,labels_eval,eval_sequences)
t2=time.time()
if epoch+1==1:
util.writelog('>>> per epoch cost time:'+str(round((t2-t1),2))+'s',opt,True)
......
......@@ -11,10 +11,8 @@ from util import util,transformer,dataloader,statistics,plot,options
from models import core
opt = options.Options().getparse()
t1 = time.time()
'''
Use your own data to train
"""Use your own data to train
* step1: Generate signals.npy and labels.npy in the following format.
# 1.type:numpydata signals:np.float64 labels:np.int64
# 2.shape signals:[num,ch,length] labels:[num]
......@@ -23,22 +21,15 @@ Use your own data to train
signals = np.zeros((10,1,10),dtype='np.float64')
labels = np.array([0,0,0,0,0,1,1,1,1,1]) #0->class0 1->class1
* step2: input ```--dataset_dir your_dataset_dir``` when running code.
'''
"""
#----------------------------Load Data----------------------------
if opt.separated:
signals_train,labels_train,signals_eval,labels_eval = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels_train)
util.writelog('label statistics: '+str(label_cnt),opt,True)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals_train)
train_sequences= transformer.k_fold_generator(len(labels_train),opt.k_fold,opt.separated)
eval_sequences= transformer.k_fold_generator(len(labels_eval),opt.k_fold,opt.separated)
else:
signals,labels = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
util.writelog('label statistics: '+str(label_cnt),opt,True)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals)
train_sequences,eval_sequences = transformer.k_fold_generator(len(labels),opt.k_fold)
t1 = time.time()
signals,labels = dataloader.loaddataset(opt)
label_cnt,label_cnt_per,label_num = statistics.label_statistics(labels)
util.writelog('label statistics: '+str(label_cnt),opt,True)
opt = options.get_auto_options(opt, label_cnt_per, label_num, signals)
train_sequences,eval_sequences = transformer.k_fold_generator(len(labels),opt.k_fold,opt.fold_index)
t2 = time.time()
print('Cost time: %.2f'% (t2-t1),'s')
......@@ -51,16 +42,11 @@ for fold in range(opt.k_fold):
if opt.k_fold != 1:util.writelog('------------------------------ k-fold:'+str(fold+1)+' ------------------------------',opt,True)
core.network_init()
final_confusion_mat = np.zeros((opt.label,opt.label), dtype=int)
for epoch in range(opt.epochs):
for epoch in range(opt.epochs):
t1 = time.time()
if opt.separated:
#print(signals_train.shape,labels_train.shape)
core.train(signals_train,labels_train,train_sequences)
core.eval(signals_eval,labels_eval,eval_sequences)
else:
core.train(signals,labels,train_sequences[fold])
core.eval(signals,labels,eval_sequences[fold])
core.train(signals,labels,train_sequences[fold])
core.eval(signals,labels,eval_sequences[fold])
core.save()
t2=time.time()
......
......@@ -20,10 +20,6 @@ def del_labels(signals,labels,dels):
return signals,labels
# def sortbylabel(signals,labels):
# signals
def segment_dataset(signals,labels,a=0.8,random=True):
length = len(labels)
if random:
......@@ -92,32 +88,17 @@ def balance_label(signals,labels):
#load all data in datasets
def loaddataset(opt,shuffle = False):
def loaddataset(opt):
print('Loading dataset...')
if opt.separated:
signals_train = np.load(opt.dataset_dir+'/signals_train.npy')
labels_train = np.load(opt.dataset_dir+'/labels_train.npy')
signals_eval = np.load(opt.dataset_dir+'/signals_eval.npy')
labels_eval = np.load(opt.dataset_dir+'/labels_eval.npy')
if opt.normliaze != 'None':
for i in range(signals_train.shape[0]):
for j in range(signals_train.shape[1]):
signals_train[i][j] = arr.normliaze(signals_train[i][j], mode = opt.normliaze, truncated=5)
for i in range(signals_eval.shape[0]):
for j in range(signals_eval.shape[1]):
signals_eval[i][j] = arr.normliaze(signals_eval[i][j], mode = opt.normliaze, truncated=5)
else:
signals = np.load(opt.dataset_dir+'/signals.npy')
labels = np.load(opt.dataset_dir+'/labels.npy')
if opt.normliaze != 'None':
for i in range(signals.shape[0]):
for j in range(signals.shape[1]):
signals[i][j] = arr.normliaze(signals[i][j], mode = opt.normliaze, truncated=5)
if not opt.no_shuffle:
transformer.shuffledata(signals,labels)
if opt.separated:
return signals_train,labels_train,signals_eval,labels_eval
else:
return signals,labels
\ No newline at end of file
signals = np.load(os.path.join(opt.dataset_dir,'signals.npy'))
labels = np.load(os.path.join(opt.dataset_dir,'labels.npy'))
if opt.normliaze != 'None':
for i in range(signals.shape[0]):
for j in range(signals.shape[1]):
signals[i][j] = arr.normliaze(signals[i][j], mode = opt.normliaze, truncated=5)
if opt.fold_index == 'auto':
transformer.shuffledata(signals,labels)
return signals,labels
\ No newline at end of file
......@@ -22,11 +22,29 @@ class Options():
# ------------------------Dataset------------------------
self.parser.add_argument('--dataset_dir', type=str, default='./datasets/simple_test',help='your dataset path')
self.parser.add_argument('--save_dir', type=str, default='./checkpoints/',help='save checkpoints')
self.parser.add_argument('--separated', action='store_true', help='if specified,for preload data, if input, load separated train and test datasets')
self.parser.add_argument('--no_shuffle', action='store_true', help='if specified, do not shuffle data when load(use to evaluate individual differences)')
self.parser.add_argument('--load_thread', type=int, default=8,help='how many threads when load data')
self.parser.add_argument('--normliaze', type=str, default='5_95', help='mode of normliaze, 5_95 | maxmin | None')
self.parser.add_argument('--k_fold', type=int, default=0,help='fold_num of k-fold.If 0 or 1, no k-fold and cut 0.8 to train and other to eval')
"""--fold_index
5-fold:
Cut dataset into sub-set using index , and then run k-fold with sub-set
If input 'auto', it will shuffle dataset and then cut dataset equally
If input: [2,4,6,7]
when len(dataset) == 10
sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:]
---------------------------------------------------------------
No-fold:
If input 'auto', it will shuffle dataset and then cut 80% dataset to train and other to eval
If input: [5]
when len(dataset) == 10
train-set : dataset[0:5] eval-set : dataset[5:]
"""
self.parser.add_argument('--fold_index', type=str, default='auto',
help='where to fold, eg. when 5-fold and input: [2,4,6,7] -> sub-set: dataset[0:2],dataset[2:4],dataset[4:6],dataset[6:7],dataset[7:]')
self.parser.add_argument('--mergelabel', type=str, default='None',
help='merge some labels to one label and give the result, example:"[[0,1,4],[2,3,5]]" -> label(0,1,4) regard as 0,label(2,3,5) regard as 1')
self.parser.add_argument('--mergelabel_name', type=str, default='None',help='name of labels,example:"a,b,c,d,e,f"')
# ------------------------Network------------------------
"""Available Network
1d: lstm, cnn_1d, resnet18_1d, resnet34_1d, multi_scale_resnet_1d,
......@@ -58,11 +76,7 @@ class Options():
self.parser.add_argument('--weight_mod', type=str, default='auto',help='Choose weight mode: auto | normal')
self.parser.add_argument('--epochs', type=int, default=20,help='end epoch')
self.parser.add_argument('--network_save_freq', type=int, default=5,help='the freq to save network')
self.parser.add_argument('--k_fold', type=int, default=0,help='fold_num of k-fold.if 0 or 1,no k-fold')
self.parser.add_argument('--mergelabel', type=str, default='None',
help='merge some labels to one label and give the result, example:"[[0,1,4],[2,3,5]]" -> label(0,1,4) regard as 0,label(2,3,5) regard as 1')
self.parser.add_argument('--mergelabel_name', type=str, default='None',help='name of labels,example:"a,b,c,d,e,f"')
self.initialized = True
def getparse(self):
......@@ -98,8 +112,11 @@ class Options():
if self.opt.k_fold == 0 :
self.opt.k_fold = 1
if self.opt.separated:
self.opt.k_fold = 1
if self.opt.fold_index != 'auto':
self.opt.fold_index = eval(self.opt.fold_index)
if os.path.isfile(os.path.join(self.opt.dataset_dir,'index.npy')):
self.opt.fold_index = (np.load(os.path.join(self.opt.dataset_dir,'index.npy'))).tolist()
self.opt.mergelabel = eval(self.opt.mergelabel)
if self.opt.mergelabel_name != 'None':
......
import os
import numpy as np
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D
......
......@@ -13,24 +13,31 @@ def shuffledata(data,target):
np.random.shuffle(target)
# return data,target
def k_fold_generator(length,fold_num,separated=False):
if separated:
sequence = np.linspace(0, length-1,num = length,dtype='int')
return sequence
def k_fold_generator(length,fold_num,fold_index = 'auto'):
sequence = np.linspace(0,length-1,length,dtype='int')
train_sequence = [];eval_sequence = []
if fold_num == 0 or fold_num == 1:
if fold_index != 'auto' :
fold_index = [0]+fold_index+[length]
else:
fold_index = [0]+[int(length*0.8)]+[length]
train_sequence.append(sequence[:fold_index[1]])
eval_sequence.append(sequence[fold_index[1]:])
else:
if fold_num == 0 or fold_num == 1:
train_sequence = np.linspace(0,int(length*0.8)-1,int(length*0.8),dtype='int')[None]
test_sequence = np.linspace(int(length*0.8),length-1,int(length*0.2),dtype='int')[None]
if fold_index != 'auto' :
fold_index = [0]+fold_index+[length]
else:
sequence = np.linspace(0,length-1,length,dtype='int')
train_length = int(length/fold_num*(fold_num-1))
test_length = int(length/fold_num)
train_sequence = np.zeros((fold_num,train_length), dtype = 'int')
test_sequence = np.zeros((fold_num,test_length), dtype = 'int')
fold_index = []
for i in range(fold_num):
test_sequence[i] = (sequence[test_length*i:test_length*(i+1)])[:test_length]
train_sequence[i] = np.concatenate((sequence[0:test_length*i],sequence[test_length*(i+1):]),axis=0)[:train_length]
return train_sequence,test_sequence
fold_index.append(length//fold_num*i)
fold_index.append(length)
for i in range(len(fold_index)-1):
eval_sequence.append(sequence[fold_index[i]:fold_index[i+1]])
train_sequence.append(np.concatenate((sequence[0:fold_index[i]],sequence[fold_index[i+1]:]),axis=0))
if fold_num > 1:
print('fold_index:',fold_index)
return train_sequence,eval_sequence
def batch_generator(data,target,sequence,shuffle = True):
batchsize = len(sequence)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册