提交 a2f4ad99 编写于 作者: R root

add_more_config_for_lrc

上级 71db55ab
......@@ -113,4 +113,10 @@ MY_DARTS = Genotype(
('skip_connect', 2), ('skip_connect', 3)],
reduce_concat=range(2, 6))
DARTS = MY_DARTS
MY_DARTS_list = [
Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 2)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('max_pool_3x3', 0), ('skip_connect', 3), ('avg_pool_3x3', 1), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)),
Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 1)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('dil_conv_3x3', 0), ('skip_connect', 3), ('skip_connect', 2), ('skip_connect', 3), ('skip_connect',2)], reduce_concat=range(2, 6)),
Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6))
]
DARTS = MY_DARTS_list[0]
......@@ -38,6 +38,41 @@ def cosine_decay(learning_rate, num_epoch, steps_one_epoch):
with init_on_cpu():
decayed_lr = learning_rate * \
(ops.cos((global_step / steps_one_epoch) \
(ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \
* math.pi / num_epoch) + 1)/2
return decayed_lr
def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch,
warmup_epochs, total_epoch, num_gpu):
global_step = _decay_step_counter()
epoch_idx = fluid.layers.floor(global_step / steps_one_epoch)
lr = fluid.layers.create_global_var(
shape=[1],
value=0.0,
dtype='float32',
persistable=True,
name="learning_rate")
warmup_epoch_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True)
num_gpu_var = fluid.layers.fill_constant(
shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True)
batch_idx = global_step - steps_one_epoch * epoch_idx
with fluid.layers.control_flow.Switch() as switch:
with switch.case(epoch_idx < warmup_epoch_var):
epoch_ = (batch_idx + 1) / steps_one_epoch
factor = 1 / num_gpu_var * (epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1)
decayed_lr = learning_rate * factor * num_gpu_var
fluid.layers.assign(decayed_lr, lr)
epoch_ = (batch_idx + 1) / steps_one_epoch
m = epoch_ / total_epoch
frac = (1 + ops.cos(math.pi * m)) / 2
cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var
with switch.default():
fluid.layers.assign(cosine_lr, lr)
return lr
......@@ -97,7 +97,9 @@ class Cell():
def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
relu_a = fluid.layers.relu(input)
relu_a = fluid.layers.relu(input, inplace=True)
#relu_a.persistable = True
#print(relu_a)
pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
conv2d_a = fluid.layers.conv2d(
pool_a,
......@@ -141,6 +143,8 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
initializer=Constant(0.), name=bn_b_name + '.bias'),
moving_mean_name=bn_b_name + '.running_mean',
moving_variance_name=bn_b_name + '.running_var')
#bn_b.persistable = True
#print(bn_b)
fc_name = aux_name + '.classifier'
fc = fluid.layers.fc(bn_b,
num_classes,
......@@ -174,11 +178,12 @@ def StemConv(input, C_out, kernel_size, padding):
return bn_a
class NetworkCIFAR(object):
def __init__(self, C, class_num, layers, auxiliary, genotype):
self.class_num = class_num
self._layers = layers
self._auxiliary = auxiliary
self.class_num = class_num
stem_multiplier = 3
self.drop_path_prob = 0
......@@ -201,36 +206,12 @@ class NetworkCIFAR(object):
if i == 2 * layers // 3:
C_to_auxiliary = C_prev
def forward(self, init_channel, is_train):
self.training = is_train
self.logits_aux = None
num_channel = init_channel * 3
s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
s1 = s0
for i, cell in enumerate(self.cells):
name = 'cells.' + str(i) + '.'
s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
name)
if i == int(2 * self._layers // 3):
if self._auxiliary and self.training:
self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
self.logits = fluid.layers.fc(out,
size=self.class_num,
param_attr=ParamAttr(
initializer=Normal(scale=1e-3),
name='classifier.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.),
name='classifier.bias'))
return self.logits, self.logits_aux
def build_input(self, image_shape, batch_size, is_train):
def build_input(self, image_shape, is_train):
if is_train:
py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1],
[-1, 1], [-1, batch_size, self.class_num - 1]],
[-1, 1], [50, -1, self.class_num - 1]],
lod_levels=[0, 0, 0, 0, 0, 0, 0],
dtypes=[
"float32", "int64", "int64", "float32", "int32", "int32",
......@@ -248,14 +229,52 @@ class NetworkCIFAR(object):
name='test_reader')
return py_reader
def train_model(self, py_reader, init_channels, aux, aux_w, batch_size,
loss_lambda):
def forward(self, init_channel, is_train):
self.training = is_train
self.logits_aux = None
num_channel = init_channel * 3
s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
#s0.persistable = True
#print(s0)
print(s0)
for i, cell in enumerate(self.cells):
#s1.persistable = True
#print(s1)
name = 'cells.' + str(i) + '.'
s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
name)
if i == int(2 * self._layers // 3):
if self._auxiliary and self.training:
#s1.persistable = True
#print(s1)
self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
#self.logits_aux.persistable = True
#print(self.logits_aux)
out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
#out.persistable = True
#print(out)
self.logits = fluid.layers.fc(out,
size=self.class_num,
param_attr=ParamAttr(
initializer=Normal(scale=1e-3),
name='classifier.weight'),
bias_attr=ParamAttr(
initializer=Constant(0,),
name='classifier.bias'))
#self.logits.persistable = True
#print(self.logits)
#print(self.logits_aux)
return self.logits, self.logits_aux
def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda):
self.image, self.ya, self.yb, self.lam, self.label_reshape,\
self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader)
self.logits, self.logits_aux = self.forward(init_channels, True)
self.mixup_loss = self.mixup_loss(aux, aux_w)
self.lrc_loss = self.lrc_loss(batch_size)
return self.mixup_loss + loss_lambda * self.lrc_loss
#self.lrc_loss = self.lrc_loss()
#return self.mixup_loss + loss_lambda * self.lrc_loss
return self.mixup_loss
def test_model(self, py_reader, init_channels):
self.image, self.ya = fluid.layers.read_file(py_reader)
......@@ -264,12 +283,13 @@ class NetworkCIFAR(object):
loss = fluid.layers.cross_entropy(prob, self.ya)
acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1)
acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5)
return loss, acc_1, acc_5
return prob, acc_1, acc_5
def mixup_loss(self, auxiliary, auxiliary_weight):
prob = fluid.layers.softmax(self.logits, use_cudnn=False)
loss_a = fluid.layers.cross_entropy(prob, self.ya)
loss_b = fluid.layers.cross_entropy(prob, self.yb)
loss_a_mean = fluid.layers.reduce_mean(loss_a)
loss_b_mean = fluid.layers.reduce_mean(loss_b)
loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean
......@@ -281,9 +301,10 @@ class NetworkCIFAR(object):
loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
) * loss_b_aux_mean
#print(loss_aux)
return loss + auxiliary_weight * loss_aux
def lrc_loss(self, batch_size):
def lrc_loss(self):
y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1))
label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1])
non_label_reshape = fluid.layers.squeeze(
......@@ -296,18 +317,247 @@ class NetworkCIFAR(object):
y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape,
non_label_reshape)
y_diff_label = fluid.layers.reshape(
y_diff_label_reshape, shape=(-1, batch_size, 1))
y_diff_label_reshape, shape=(1, -1, 1))
y_diff_non_label = fluid.layers.reshape(
y_diff_non_label_reshape,
shape=(-1, batch_size, self.class_num - 1))
shape=(1, -1, self.class_num - 1))
y_diff_ = y_diff_non_label - y_diff_label
y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0])
rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0])
rad_y_diff_trans = rad_var_trans * y_diff_
lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1])
lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size *
(self.class_num - 1))
shape_nbc = fluid.layers.shape(rad_y_diff_trans)
shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2])
num = fluid.layers.reduce_prod(shape_nb)
num.stop_gradient = True
lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num
lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_)
return lrc_loss_mean
def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
relu_a = fluid.layers.relu(input, inplace=True)
#relu_a.persistable = True
#print(relu_a)
pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2)
conv2d_a = fluid.layers.conv2d(
pool_a,
128,
1,
name=aux_name + '.features.2',
param_attr=ParamAttr(
initializer=Xavier(
uniform=False, fan_in=0),
name=aux_name + '.features.2.weight'),
bias_attr=False)
bn_a_name = aux_name + '.features.3'
bn_a = fluid.layers.batch_norm(
conv2d_a,
act='relu',
name=bn_a_name,
param_attr=ParamAttr(
initializer=Constant(1.), name=bn_a_name + '.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.), name=bn_a_name + '.bias'),
moving_mean_name=bn_a_name + '.running_mean',
moving_variance_name=bn_a_name + '.running_var')
conv2d_b = fluid.layers.conv2d(
bn_a,
768,
2,
act='relu',
name=aux_name + '.features.5',
param_attr=ParamAttr(
initializer=Xavier(
uniform=False, fan_in=0),
name=aux_name + '.features.5.weight'),
bias_attr=False)
#bn_b.persistable = True
#print(bn_b)
fc_name = aux_name + '.classifier'
fc = fluid.layers.fc(conv2d_b,
num_classes,
name=fc_name,
param_attr=ParamAttr(
initializer=Normal(scale=1e-3),
name=fc_name + '.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.), name=fc_name + '.bias'))
return fc
def Stem0Conv(input, C_out):
conv_a = fluid.layers.conv2d(
input,
C_out // 2,
3,
stride=2,
padding=1,
param_attr=ParamAttr(
initializer=Xavier(
uniform=False, fan_in=0), name='stem0.0.weight'),
bias_attr=False)
bn_a = fluid.layers.batch_norm(
conv_a,
param_attr=ParamAttr(
initializer=Constant(1.), name='stem0.1.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.), name='stem0.1.bias'),
moving_mean_name='stem0.1.running_mean',
moving_variance_name='stem0.1.running_var')
relu_a = fluid.layers.relu(bn_a, inplace=True)
conv_b = fluid.layers.conv2d(
relu_a,
C_out,
3,
padding=1,
param_attr=ParamAttr(
initializer=Xavier(
uniform=False, fan_in=0), name='stem0.3.weight'),
bias_attr=False)
bn_b = fluid.layers.batch_norm(
conv_b,
param_attr=ParamAttr(
initializer=Constant(1.), name='stem0.4.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.), name='stem0.4.bias'),
moving_mean_name='stem0.4.running_mean',
moving_variance_name='stem0.4.running_var')
return bn_b
def Stem1Conv(input, C_out):
relu_a = fluid.layers.relu(input, inplace=True)
conv_a = fluid.layers.conv2d(
relu_a,
C_out,
3,
stride=2,
padding=1,
param_attr=ParamAttr(
initializer=Xavier(
uniform=False, fan_in=0), name='stem1.1.weight'),
bias_attr=False)
bn_a = fluid.layers.batch_norm(
conv_a,
param_attr=ParamAttr(
initializer=Constant(1.), name='stem1.2.weight'),
bias_attr=ParamAttr(
initializer=Constant(0.), name='stem1.2.bias'),
moving_mean_name='stem1.2.running_mean',
moving_variance_name='stem1.2.running_var')
return bn_a
class NetworkImageNet(object):
def __init__(self, C, class_num, layers, auxiliary, genotype):
self.class_num = class_num
self._layers = layers
self._auxiliary = auxiliary
self.drop_path_prob = 0
C_prev_prev, C_prev, C_curr = C, C, C
self.cells = []
reduction_prev = True
for i in range(layers):
if i in [layers // 3, 2 * layers // 3]:
C_curr *= 2
reduction = True
else:
reduction = False
cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
reduction_prev)
reduction_prev = reduction
self.cells += [cell]
C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
if i == 2 * layers // 3:
C_to_auxiliary = C_prev
self.stem0 = functools.partial(Stem0Conv, C_out=C)
self.stem1 = functools.partial(Stem1Conv, C_out=C)
def build_input(self, image_shape, is_train):
if is_train:
py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=[
"float32", "int64"],
use_double_buffer=True,
name='train_reader')
else:
py_reader = fluid.layers.py_reader(
capacity=64,
shapes=[[-1] + image_shape, [-1, 1]],
lod_levels=[0, 0],
dtypes=["float32", "int64"],
use_double_buffer=True,
name='test_reader')
return py_reader
def forward(self, init_channel, is_train):
self.training = is_train
self.logits_aux = None
num_channel = init_channel * 3
s0 = self.stem0(self.image)
s1 = self.stem1(s0)
for i, cell in enumerate(self.cells):
#s1.persistable = True
#print(s1)
name = 'cells.' + str(i) + '.'
s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
name)
if i == int(2 * self._layers // 3):
if self._auxiliary and self.training:
#s1.persistable = True
#print(s1)
self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num)
#self.logits_aux.persistable = True
#print(self.logits_aux)
out = fluid.layers.pool2d(s1, 7, "avg")
#out.persistable = True
#print(out)
self.logits = fluid.layers.fc(out,
size=self.class_num,
param_attr=ParamAttr(
initializer=Normal(scale=1e-3),
name='classifier.weight'),
bias_attr=ParamAttr(
initializer=Constant(0,),
name='classifier.bias'))
#self.logits.persistable = True
#print(self.logits)
#print(self.logits_aux)
return self.logits, self.logits_aux
def calc_loss(self, auxiliary, auxiliary_weight):
prob = fluid.layers.softmax(self.logits, use_cudnn=False)
loss = fluid.layers.cross_entropy(prob, self.label)
loss_mean = fluid.layers.reduce_mean(loss)
#if auxiliary:
# prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
# loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
# loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
return loss_mean + auxiliary_weight * loss_aux_mean
def train_model(self, py_reader, init_channels, aux, aux_w):
self.image, self.label = fluid.layers.read_file(py_reader)
self.logits, self.logits_aux = self.forward(init_channels, True)
self.loss = self.calc_loss(aux, aux_w)
return self.loss
def test_model(self, py_reader, init_channels):
self.image, self.label = fluid.layers.read_file(py_reader)
self.logits, _ = self.forward(init_channels, False)
prob = fluid.layers.softmax(self.logits, use_cudnn=False)
loss = fluid.layers.cross_entropy(prob, self.label)
acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1)
acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5)
return prob, acc_1, acc_5
......@@ -312,7 +312,7 @@ def FactorizedReduce(input, C_out, name='', affine=True):
bias_attr=False)
h_end = relu_a.shape[2]
w_end = relu_a.shape[3]
slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end])
slice_a = fluid.layers.slice(input=relu_a, axes=[2, 3], starts=[1, 1], ends=[h_end, w_end])
conv2d_b = fluid.layers.conv2d(
slice_a,
C_out // 2,
......
......@@ -31,7 +31,10 @@ from PIL import Image
from PIL import ImageOps
import numpy as np
import cPickle
try:
import cPickle as pickle
except:
import pickle
import random
import utils
import paddle.fluid as fluid
......@@ -46,10 +49,9 @@ image_size = 32
image_depth = 3
half_length = 8
CIFAR_MEAN = [0.4914, 0.4822, 0.4465]
CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]
def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10):
reshape_label = np.zeros((batch_size, 1), dtype='int32')
reshape_non_label = np.zeros(
......@@ -82,10 +84,11 @@ def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10):
def preprocess(sample, is_training, args):
image_array = sample.reshape(3, image_size, image_size)
rgb_array = np.transpose(image_array, (1, 2, 0))
img = Image.fromarray(rgb_array, 'RGB')
if is_training:
# pad and ramdom crop
img = ImageOps.expand(img, (4, 4, 4, 4), fill=0) # pad to 40 * 40 * 3
......@@ -94,13 +97,13 @@ def preprocess(sample, is_training, args):
left_top[1] + image_size))
if np.random.randint(2):
img = img.transpose(Image.FLIP_LEFT_RIGHT)
img = np.array(img).astype(np.float32)
# per_image_standardization
img_float = img / 255.0
img = (img_float - CIFAR_MEAN) / CIFAR_STD
if is_training and args.cutout:
center = np.random.randint(image_size, size=2)
offset_width = max(0, center[0] - half_length)
......@@ -111,7 +114,7 @@ def preprocess(sample, is_training, args):
for i in range(offset_height, target_height):
for j in range(offset_width, target_width):
img[i][j][:] = 0.0
img = np.transpose(img, (2, 0, 1))
return img
......@@ -123,13 +126,15 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
datasets = []
for name in names:
print("Reading file " + name)
batch = cPickle.load(open(filename + name, 'rb'))
batch = pickle.load(open(filename + name, 'rb'))
data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None
dataset = zip(data, labels)
datasets.extend(dataset)
random.shuffle(datasets)
if is_training:
random.shuffle(datasets)
def read_batch(datasets, args):
for sample, label in datasets:
......@@ -145,6 +150,10 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
if len(batch_data) == args.batch_size:
batch_data = np.array(batch_data, dtype='float32')
batch_label = np.array(batch_label, dtype='int64')
#
# batch_data = pickle.load(open('input.pkl'))
# batch_label = pickle.load(open('target.pkl')).reshape(-1,1)
#
if is_training:
flatten_label, flatten_non_label = \
generate_reshape_label(batch_label, args.batch_size)
......@@ -160,6 +169,24 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
yield batch_out
batch_data = []
batch_label = []
if len(batch_data) != 0:
batch_data = np.array(batch_data, dtype='float32')
batch_label = np.array(batch_label, dtype='int64')
if is_training:
flatten_label, flatten_non_label = \
generate_reshape_label(batch_label, len(batch_data))
rad_var = generate_bernoulli_number(len(batch_data))
mixed_x, y_a, y_b, lam = utils.mixup_data(
batch_data, batch_label, len(batch_data),
args.mix_alpha)
batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \
flatten_non_label, rad_var]]
yield batch_out
else:
batch_out = [[batch_data, batch_label]]
yield batch_out
batch_data = []
batch_label = []
return reader
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------
from PIL import Image
from PIL import ImageOps
import numpy as np
try:
import cPickle as pickle
except:
import pickle
import random
import utils
import paddle.fluid as fluid
import time
import os
import functools
import paddle.reader
import math
__all__ = ['train10', 'test10']
train_image_size = 224
test_image_size = 256
CIFAR_MEAN = [0.485, 0.456, 0.406]
CIFAR_STD = [0.229, 0.224, 0.225]
def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
aspect_ratio = math.sqrt(np.random.uniform(*ratio))
w = 1. * aspect_ratio
h = 1. / aspect_ratio
bound = min((float(img.size[0]) / img.size[1]) / (w**2),
(float(img.size[1]) / img.size[0]) / (h**2))
scale_max = min(scale[1], bound)
scale_min = min(scale[0], bound)
target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
scale_max)
target_size = math.sqrt(target_area)
w = int(target_size * w)
h = int(target_size * h)
i = np.random.randint(0, img.size[0] - w + 1)
j = np.random.randint(0, img.size[1] - h + 1)
img = img.crop((i, j, i + w, j + h))
img = img.resize((size, size), Image.BILINEAR)
return img
def crop_image(img, target_size, center=True):
width, height = img.size
size = target_size
if center == True:
w_start = (width - size) / 2
h_start = (height - size) / 2
else:
w_start = np.random.randint(0, width - size + 1)
h_start = np.random.randint(0, height - size + 1)
w_end = w_start + size
h_end = h_start + size
img = img.crop((w_start, h_start, w_end, h_end))
return img
def preprocess(img_path, is_training):
img = Image.open(img_path)
if is_training:
# ramdom resized crop
img = random_crop(img, train_image_size)
# random horizontal flip
if np.random.randint(2):
img = img.transpose(Image.FLIP_LEFT_RIGHT)
else:
# resize
img = img.resize((test_image_size, test_image_size), Image.BILINEAR)
# center crop
img = crop_image(img, train_image_size)
if img.mode != 'RGB':
img = img.convert('RGB')
img = np.array(img).astype(np.float32)
# per_image_standardization
img_float = img / 255.0
img = (img_float - CIFAR_MEAN) / CIFAR_STD
img = np.transpose(img, (2, 0, 1))
return img
def reader_creator_filepath(data_dir, sub_name, is_training):
file_list = os.path.join(data_dir, sub_name)
image_file = 'train' if is_training else 'val'
dataset_path = os.path.join(data_dir, image_file)
print(dataset_path)
def reader():
with open(file_list) as flist:
lines = [line.strip() for line in flist]
if is_training:
np.random.shuffle(lines)
for line in lines:
img_path, label = line.split()
#img_path = img_path.replace("JPEG", "jpeg")
img_path_ = os.path.join(dataset_path, img_path)
img = preprocess(img_path_, is_training)
yield img, int(label)
return reader
def train(args):
"""
CIFAR-10 training set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Training reader creator
:rtype: callable
"""
return reader_creator_filepath(args.data, 'train.txt', True)
def test(args):
"""
CIFAR-10 test set creator.
It returns a reader creator, each sample in the reader is image pixels in
[0, 1] and label in [0, 9].
:return: Test reader creator.
:rtype: callable
"""
return reader_creator_filepath(args.data, 'val.txt', False)
CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \
--batch_size=80 \
--auxiliary \
--weight_decay=0.0003 \
--learning_rate=0.025 \
--lrc_loss_lambda=0.7 \
--cutout
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from learning_rate import cosine_with_warmup_decay
import numpy as np
import argparse
from model import NetworkImageNet as Network
import reader_imagenet as reader
import sys
import os
import time
import logging
import genotypes
import paddle
import paddle.fluid as fluid
import shutil
import utils
import math
parser = argparse.ArgumentParser("imagenet")
parser.add_argument(
'--data',
type=str,
default='./dataset/imagenet/',
help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=64, help='batch size')
parser.add_argument(
'--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
parser.add_argument('--model_id', type=int, default=2, help='model id')
parser.add_argument(
'--learning_rate', type=float, default=0.025, help='init learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
parser.add_argument(
'--weight_decay', type=float, default=4e-5, help='weight decay')
parser.add_argument(
'--report_freq', type=float, default=10, help='report frequency')
parser.add_argument(
'--epochs', type=int, default=90, help='num of training epochs')
parser.add_argument(
'--init_channels', type=int, default=96, help='num of init channels')
parser.add_argument(
'--layers', type=int, default=20, help='total number of layers')
parser.add_argument(
'--save_model_path',
type=str,
default='save_models',
help='path to save the model')
parser.add_argument(
'--auxiliary',
action='store_true',
default=False,
help='use auxiliary tower')
parser.add_argument(
'--auxiliary_weight',
type=float,
default=0.4,
help='weight for auxiliary loss')
parser.add_argument(
'--drop_path_prob', type=float, default=0.4, help='drop path probability')
parser.add_argument(
'--arch', type=str, default='DARTS', help='which architecture to use')
parser.add_argument(
'--grad_clip', type=float, default=5, help='gradient clipping')
parser.add_argument(
'--warmup_epochs',
default=5,
type=float,
help='warm up to learning rate')
parser.add_argument('--lr_min', type=float, default=0.0001,
help='minimum learning rate for a single GPU')
args = parser.parse_args()
ImageNet_CLASSES = 1000
dataset_train_size = 1281167
image_size = 224
genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
def main():
image_shape = [3, image_size, image_size]
devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
devices_num = len(devices.split(","))
logging.info("args = %s", args)
genotype = eval("genotypes.%s" % args.arch)
model = Network(args.init_channels, ImageNet_CLASSES, args.layers,
args.auxiliary, genotype)
steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
train(model, args, image_shape, steps_one_epoch, devices_num)
def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
steps_one_epoch, num_gpu):
out = []
with fluid.program_guard(main_prog, startup_prog):
py_reader = model.build_input(im_shape, is_train)
if is_train:
with fluid.unique_name.guard():
loss = model.train_model(py_reader, args.init_channels,
args.auxiliary, args.auxiliary_weight)
optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_with_warmup_decay(\
args.learning_rate, args.lr_min, steps_one_epoch,\
args.warmup_epochs, args.epochs, num_gpu),
regularization=fluid.regularizer.L2Decay(\
args.weight_decay),
momentum=args.momentum)
optimizer.minimize(loss)
out = [py_reader, loss]
else:
with fluid.unique_name.guard():
prob, acc_1, acc_5 = model.test_model(py_reader,
args.init_channels)
out = [py_reader, prob, acc_1, acc_5]
return out
def train(model, args, im_shape, steps_one_epoch, num_gpu):
train_startup_prog = fluid.Program()
test_startup_prog = fluid.Program()
train_prog = fluid.Program()
test_prog = fluid.Program()
train_py_reader, loss_train = build_program(train_prog, train_startup_prog,
args, True, model, im_shape,
steps_one_epoch, num_gpu)
test_py_reader, prob, acc_1, acc_5 = build_program(
test_prog, test_startup_prog, args, False, model, im_shape,
steps_one_epoch, num_gpu)
test_prog = test_prog.clone(for_test=True)
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(train_startup_prog)
exe.run(test_startup_prog)
#if args.pretrained_model:
# def if_exist(var):
# return os.path.exists(os.path.join(args.pretrained_model, var.name))
# fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 1
train_exe = fluid.ParallelExecutor(
main_program=train_prog,
use_cuda=True,
loss_name=loss_train.name,
exec_strategy=exec_strategy)
train_batch_size = args.batch_size
test_batch_size = 256
train_reader = paddle.batch(
reader.train(args), batch_size=train_batch_size, drop_last=True)
test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size)
train_py_reader.decorate_paddle_reader(train_reader)
test_py_reader.decorate_paddle_reader(test_reader)
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
train_fetch_list = [loss_train]
fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
def save_model(postfix, main_prog):
model_path = os.path.join(args.save_model_path, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
fluid.io.save_persistables(exe, model_path, main_program=main_prog)
def test(epoch_id):
test_fetch_list = [prob, acc_1, acc_5]
#objs = utils.AvgrageMeter()
#prob = []
top1 = utils.AvgrageMeter()
top5 = utils.AvgrageMeter()
test_py_reader.start()
test_start_time = time.time()
step_id = 0
try:
while True:
prev_test_start_time = test_start_time
test_start_time = time.time()
prob_v, acc_1_v, acc_5_v = exe.run(
test_prog, fetch_list=test_fetch_list)
top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
if step_id % args.report_freq == 0:
print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
format(epoch_id, step_id,
np.array(acc_1_v),
np.array(acc_5_v), test_start_time -
prev_test_start_time))
step_id += 1
except fluid.core.EOFException:
test_py_reader.reset()
print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
top5.avg))
epoch_start_time = time.time()
for epoch_id in range(args.epochs):
model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs
train_py_reader.start()
epoch_end_time = time.time()
if epoch_id > 0:
print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time
- epoch_start_time))
epoch_start_time = epoch_end_time
epoch_end_time
start_time = time.time()
step_id = 0
try:
while True:
prev_start_time = start_time
start_time = time.time()
loss_v, = train_exe.run(
fetch_list=[v.name for v in train_fetch_list])
print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \
np.array(loss_v).mean(), start_time-prev_start_time))
step_id += 1
sys.stdout.flush()
os._exit(1)
except fluid.core.EOFException:
train_py_reader.reset()
if epoch_id % 50 == 0 or epoch_id == args.epochs - 1:
save_model(str(epoch_id), train_prog)
test(epoch_id)
if __name__ == '__main__':
main()
......@@ -26,7 +26,7 @@ from learning_rate import cosine_decay
import numpy as np
import argparse
from model import NetworkCIFAR as Network
import reader
import reader_cifar as reader
import sys
import os
import time
......@@ -35,7 +35,7 @@ import genotypes
import paddle.fluid as fluid
import shutil
import utils
import cPickle as cp
import math
parser = argparse.ArgumentParser("cifar")
parser.add_argument(
......@@ -44,6 +44,9 @@ parser.add_argument(
default='./dataset/cifar/cifar-10-batches-py/',
help='location of the data corpus')
parser.add_argument('--batch_size', type=int, default=96, help='batch size')
parser.add_argument(
'--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
parser.add_argument('--model_id', type=int, help='model id')
parser.add_argument(
'--learning_rate', type=float, default=0.025, help='init learning rate')
parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
......@@ -58,7 +61,7 @@ parser.add_argument(
parser.add_argument(
'--layers', type=int, default=20, help='total number of layers')
parser.add_argument(
'--model_path',
'--save_model_path',
type=str,
default='saved_models',
help='path to save the model')
......@@ -78,7 +81,6 @@ parser.add_argument(
'--cutout_length', type=int, default=16, help='cutout length')
parser.add_argument(
'--drop_path_prob', type=float, default=0.2, help='drop path probability')
parser.add_argument('--save', type=str, default='EXP', help='experiment name')
parser.add_argument(
'--arch', type=str, default='DARTS', help='which architecture to use')
parser.add_argument(
......@@ -100,9 +102,9 @@ parser.add_argument(
args = parser.parse_args()
CIFAR_CLASSES = 10
dataset_train_size = 50000
dataset_train_size = 50000.
image_size = 32
genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
def main():
image_shape = [3, image_size, image_size]
......@@ -112,7 +114,8 @@ def main():
genotype = eval("genotypes.%s" % args.arch)
model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
args.auxiliary, genotype)
steps_one_epoch = dataset_train_size / (devices_num * args.batch_size)
steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
train(model, args, image_shape, steps_one_epoch)
......@@ -120,12 +123,12 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
steps_one_epoch):
out = []
with fluid.program_guard(main_prog, startup_prog):
py_reader = model.build_input(im_shape, args.batch_size, is_train)
py_reader = model.build_input(im_shape, is_train)
if is_train:
with fluid.unique_name.guard():
loss = model.train_model(py_reader, args.init_channels,
args.auxiliary, args.auxiliary_weight,
args.batch_size, args.lrc_loss_lambda)
args.lrc_loss_lambda)
optimizer = fluid.optimizer.Momentum(
learning_rate=cosine_decay(args.learning_rate, \
args.epochs, steps_one_epoch),
......@@ -136,9 +139,9 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
out = [py_reader, loss]
else:
with fluid.unique_name.guard():
loss, acc_1, acc_5 = model.test_model(py_reader,
prob, acc_1, acc_5 = model.test_model(py_reader,
args.init_channels)
out = [py_reader, loss, acc_1, acc_5]
out = [py_reader, prob, acc_1, acc_5]
return out
......@@ -152,7 +155,7 @@ def train(model, args, im_shape, steps_one_epoch):
args, True, model, im_shape,
steps_one_epoch)
test_py_reader, loss_test, acc_1, acc_5 = build_program(
test_py_reader, prob, acc_1, acc_5 = build_program(
test_prog, test_startup_prog, args, False, model, im_shape,
steps_one_epoch)
......@@ -163,6 +166,13 @@ def train(model, args, im_shape, steps_one_epoch):
exe.run(train_startup_prog)
exe.run(test_startup_prog)
#if args.pretrained_model:
# def if_exist(var):
# return os.path.exists(os.path.join(args.pretrained_model, var.name))
# fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
exec_strategy = fluid.ExecutionStrategy()
exec_strategy.num_threads = 1
train_exe = fluid.ParallelExecutor(
......@@ -170,23 +180,27 @@ def train(model, args, im_shape, steps_one_epoch):
use_cuda=True,
loss_name=loss_train.name,
exec_strategy=exec_strategy)
train_reader = reader.train10(args)
test_reader = reader.test10(args)
train_py_reader.decorate_paddle_reader(train_reader)
test_py_reader.decorate_paddle_reader(test_reader)
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip))
fluid.memory_optimize(fluid.default_main_program())
fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
train_fetch_list = [loss_train]
fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
def save_model(postfix, main_prog):
model_path = os.path.join(args.model_path, postfix)
model_path = os.path.join(args.save_model_path, postfix)
if os.path.isdir(model_path):
shutil.rmtree(model_path)
fluid.io.save_persistables(exe, model_path, main_program=main_prog)
def test(epoch_id):
test_fetch_list = [loss_test, acc_1, acc_5]
objs = utils.AvgrageMeter()
test_fetch_list = [prob, acc_1, acc_5]
#objs = utils.AvgrageMeter()
#prob = []
top1 = utils.AvgrageMeter()
top5 = utils.AvgrageMeter()
test_py_reader.start()
......@@ -196,11 +210,10 @@ def train(model, args, im_shape, steps_one_epoch):
while True:
prev_test_start_time = test_start_time
test_start_time = time.time()
loss_test_v, acc_1_v, acc_5_v = exe.run(
prob_v, acc_1_v, acc_5_v = exe.run(
test_prog, fetch_list=test_fetch_list)
objs.update(np.array(loss_test_v), args.batch_size)
top1.update(np.array(acc_1_v), args.batch_size)
top5.update(np.array(acc_5_v), args.batch_size)
top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
if step_id % args.report_freq == 0:
print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
format(epoch_id, step_id,
......@@ -213,7 +226,6 @@ def train(model, args, im_shape, steps_one_epoch):
print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
top5.avg))
train_fetch_list = [loss_train]
epoch_start_time = time.time()
for epoch_id in range(args.epochs):
model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs
......
......@@ -34,6 +34,10 @@ def mixup_data(x, y, batch_size, alpha=1.0):
lam = 1.
index = np.random.permutation(batch_size)
#
#lam = 0.5
#index = np.arange(batch_size-1, -1, -1)
#
mixed_x = lam * x + (1 - lam) * x[index, :]
y_a, y_b = y, y[index]
return mixed_x.astype('float32'), y_a.astype('int64'),\
......
import numpy as np
import cPickle as cp
import sys, os
#model_path = 'final_paddle-results'
model_path = 'paddle-results'
fl = os.listdir(model_path)
labels = np.load('labels.npz')['arr_0']
pred = np.zeros((10000, 10))
fl.sort()
i = 0
weight=1
for f in fl:
print(f)
if i == 1: weight=1.2
if i == 2: weight=0.8
if i == 3: weight=1.3
if i == 4: weight=1.1
if i == 5: weight=0.9
pred += weight* cp.load(open(os.path.join(model_path, f)))
print(np.mean(np.argmax(pred, axis=1) == labels))
i += 1
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册