add_more_config_for_lrc

a2f4ad99 · root · 71db55ab · a2f4ad99 · a2f4ad99 · a2f4ad99
11 changed file
--- a/LRC/genotypes.py
+++ b/LRC/genotypes.py
@@ -113,4 +113,10 @@ MY_DARTS = Genotype(
            ('skip_connect', 2), ('skip_connect', 3)],
    reduce_concat=range(2, 6))

-DARTS = MY_DARTS
+MY_DARTS_list = [
+    Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 2)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('max_pool_3x3', 0), ('skip_connect', 3), ('avg_pool_3x3', 1), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)),
+    Genotype(normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('dil_conv_3x3', 2), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('skip_connect', 1)],normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2),('dil_conv_3x3', 0), ('skip_connect', 3), ('skip_connect', 2), ('skip_connect', 3), ('skip_connect',2)], reduce_concat=range(2, 6)),
+    Genotype(normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1)], normal_concat=range(2, 6), reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6))
+]
+
+DARTS = MY_DARTS_list[0]
--- a/LRC/learning_rate.py
+++ b/LRC/learning_rate.py
@@ -38,6 +38,41 @@ def cosine_decay(learning_rate, num_epoch, steps_one_epoch):

    with init_on_cpu():
        decayed_lr = learning_rate * \
-                 (ops.cos((global_step / steps_one_epoch) \
+                 (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \
                 * math.pi / num_epoch) + 1)/2
    return decayed_lr
+
+    
+def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch, 
+                                  warmup_epochs, total_epoch, num_gpu):
+    global_step = _decay_step_counter()
+    epoch_idx = fluid.layers.floor(global_step / steps_one_epoch)
+    
+    lr = fluid.layers.create_global_var(
+        shape=[1],
+        value=0.0,
+        dtype='float32',
+        persistable=True,
+        name="learning_rate")
+
+    warmup_epoch_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True)
+    num_gpu_var = fluid.layers.fill_constant(
+        shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True)
+    batch_idx = global_step - steps_one_epoch * epoch_idx 
+
+    with fluid.layers.control_flow.Switch() as switch:
+        with switch.case(epoch_idx < warmup_epoch_var):
+            epoch_ = (batch_idx + 1) / steps_one_epoch
+            factor = 1 / num_gpu_var * (epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1)
+            decayed_lr = learning_rate * factor * num_gpu_var
+            fluid.layers.assign(decayed_lr, lr)
+        epoch_ = (batch_idx + 1) / steps_one_epoch
+        m = epoch_ / total_epoch        
+        frac = (1 + ops.cos(math.pi * m)) / 2
+        cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var
+        with switch.default():
+            fluid.layers.assign(cosine_lr, lr)
+
+    return lr
+
--- a/LRC/model.py
+++ b/LRC/model.py
@@ -97,7 +97,9 @@ class Cell():


 def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
-    relu_a = fluid.layers.relu(input)
+    relu_a = fluid.layers.relu(input, inplace=True)
+    #relu_a.persistable = True
+    #print(relu_a)
    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
    conv2d_a = fluid.layers.conv2d(
        pool_a,
@@ -141,6 +143,8 @@ def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
            initializer=Constant(0.), name=bn_b_name + '.bias'),
        moving_mean_name=bn_b_name + '.running_mean',
        moving_variance_name=bn_b_name + '.running_var')
+    #bn_b.persistable = True
+    #print(bn_b)
    fc_name = aux_name + '.classifier'
    fc = fluid.layers.fc(bn_b,
                         num_classes,
@@ -174,11 +178,12 @@ def StemConv(input, C_out, kernel_size, padding):
    return bn_a


+
 class NetworkCIFAR(object):
    def __init__(self, C, class_num, layers, auxiliary, genotype):
-        self.class_num = class_num
        self._layers = layers
        self._auxiliary = auxiliary
+        self.class_num = class_num

        stem_multiplier = 3
        self.drop_path_prob = 0
@@ -201,36 +206,12 @@ class NetworkCIFAR(object):
            if i == 2 * layers // 3:
                C_to_auxiliary = C_prev

-    def forward(self, init_channel, is_train):
-        self.training = is_train
-        self.logits_aux = None
-        num_channel = init_channel * 3
-        s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
-        s1 = s0
-        for i, cell in enumerate(self.cells):
-            name = 'cells.' + str(i) + '.'
-            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
-                                      name)
-            if i == int(2 * self._layers // 3):
-                if self._auxiliary and self.training:
-                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
-        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
-        self.logits = fluid.layers.fc(out,
-                                      size=self.class_num,
-                                      param_attr=ParamAttr(
-                                          initializer=Normal(scale=1e-3),
-                                          name='classifier.weight'),
-                                      bias_attr=ParamAttr(
-                                          initializer=Constant(0.),
-                                          name='classifier.bias'))
-        return self.logits, self.logits_aux
-
-    def build_input(self, image_shape, batch_size, is_train):
+    def build_input(self, image_shape, is_train):
        if is_train:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1],
-                        [-1, 1], [-1, batch_size, self.class_num - 1]],
+                        [-1, 1], [50, -1, self.class_num - 1]],
                lod_levels=[0, 0, 0, 0, 0, 0, 0],
                dtypes=[
                    "float32", "int64", "int64", "float32", "int32", "int32",
@@ -248,14 +229,52 @@ class NetworkCIFAR(object):
                name='test_reader')
        return py_reader

-    def train_model(self, py_reader, init_channels, aux, aux_w, batch_size,
-                    loss_lambda):
+
+    def forward(self, init_channel, is_train):
+        self.training = is_train
+        self.logits_aux = None
+        num_channel = init_channel * 3
+        s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
+        #s0.persistable = True
+        #print(s0)
+        print(s0)
+        for i, cell in enumerate(self.cells):
+            #s1.persistable = True
+            #print(s1)
+            name = 'cells.' + str(i) + '.'
+            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
+                                      name)
+            if i == int(2 * self._layers // 3):
+                if self._auxiliary and self.training:
+                    #s1.persistable = True
+                    #print(s1)
+                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
+                    #self.logits_aux.persistable = True
+                    #print(self.logits_aux)
+        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
+        #out.persistable = True
+        #print(out)
+        self.logits = fluid.layers.fc(out,
+                                      size=self.class_num,
+                                      param_attr=ParamAttr(
+                                          initializer=Normal(scale=1e-3),
+                                          name='classifier.weight'),
+                                      bias_attr=ParamAttr(
+                                          initializer=Constant(0,),
+                                          name='classifier.bias'))
+        #self.logits.persistable = True
+        #print(self.logits)
+        #print(self.logits_aux)
+        return self.logits, self.logits_aux
+
+    def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda):
        self.image, self.ya, self.yb, self.lam, self.label_reshape,\
           self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader)
        self.logits, self.logits_aux = self.forward(init_channels, True)
        self.mixup_loss = self.mixup_loss(aux, aux_w)
-        self.lrc_loss = self.lrc_loss(batch_size)
-        return self.mixup_loss + loss_lambda * self.lrc_loss
+        #self.lrc_loss = self.lrc_loss()
+        #return self.mixup_loss + loss_lambda * self.lrc_loss
+        return self.mixup_loss

    def test_model(self, py_reader, init_channels):
        self.image, self.ya = fluid.layers.read_file(py_reader)
@@ -264,12 +283,13 @@ class NetworkCIFAR(object):
        loss = fluid.layers.cross_entropy(prob, self.ya)
        acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1)
        acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5)
-        return loss, acc_1, acc_5
+        return prob, acc_1, acc_5

    def mixup_loss(self, auxiliary, auxiliary_weight):
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss_a = fluid.layers.cross_entropy(prob, self.ya)
        loss_b = fluid.layers.cross_entropy(prob, self.yb)
+
        loss_a_mean = fluid.layers.reduce_mean(loss_a)
        loss_b_mean = fluid.layers.reduce_mean(loss_b)
        loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean
@@ -281,9 +301,10 @@ class NetworkCIFAR(object):
            loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
            loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
                                                     ) * loss_b_aux_mean
+            #print(loss_aux)
        return loss + auxiliary_weight * loss_aux

-    def lrc_loss(self, batch_size):
+    def lrc_loss(self):
        y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1))
        label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1])
        non_label_reshape = fluid.layers.squeeze(
@@ -296,18 +317,247 @@ class NetworkCIFAR(object):
        y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                       non_label_reshape)
        y_diff_label = fluid.layers.reshape(
-            y_diff_label_reshape, shape=(-1, batch_size, 1))
+            y_diff_label_reshape, shape=(1, -1, 1))
        y_diff_non_label = fluid.layers.reshape(
            y_diff_non_label_reshape,
-            shape=(-1, batch_size, self.class_num - 1))
+            shape=(1, -1, self.class_num - 1))
        y_diff_ = y_diff_non_label - y_diff_label

        y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0])
        rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0])
        rad_y_diff_trans = rad_var_trans * y_diff_
        lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1])
-        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size *
-                                                      (self.class_num - 1))
+        shape_nbc = fluid.layers.shape(rad_y_diff_trans)
+        shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2])
+        num = fluid.layers.reduce_prod(shape_nb)
+        num.stop_gradient = True
+        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num
        lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_)

        return lrc_loss_mean
+
+def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
+    relu_a = fluid.layers.relu(input, inplace=True)
+    #relu_a.persistable = True
+    #print(relu_a)
+    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=2)
+    conv2d_a = fluid.layers.conv2d(
+        pool_a,
+        128,
+        1,
+        name=aux_name + '.features.2',
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0),
+            name=aux_name + '.features.2.weight'),
+        bias_attr=False)
+    bn_a_name = aux_name + '.features.3'
+    bn_a = fluid.layers.batch_norm(
+        conv2d_a,
+        act='relu',
+        name=bn_a_name,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name=bn_a_name + '.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name=bn_a_name + '.bias'),
+        moving_mean_name=bn_a_name + '.running_mean',
+        moving_variance_name=bn_a_name + '.running_var')
+    conv2d_b = fluid.layers.conv2d(
+        bn_a,
+        768,
+        2,
+        act='relu',
+        name=aux_name + '.features.5',
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0),
+            name=aux_name + '.features.5.weight'),
+        bias_attr=False)
+    #bn_b.persistable = True
+    #print(bn_b)
+    fc_name = aux_name + '.classifier'
+    fc = fluid.layers.fc(conv2d_b,
+                         num_classes,
+                         name=fc_name,
+                         param_attr=ParamAttr(
+                             initializer=Normal(scale=1e-3),
+                             name=fc_name + '.weight'),
+                         bias_attr=ParamAttr(
+                             initializer=Constant(0.), name=fc_name + '.bias'))
+    return fc
+
+
+def Stem0Conv(input, C_out):
+    conv_a = fluid.layers.conv2d(
+        input,
+        C_out // 2,
+        3,
+        stride=2,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem0.0.weight'),
+        bias_attr=False)
+    bn_a = fluid.layers.batch_norm(
+        conv_a,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem0.1.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem0.1.bias'),
+        moving_mean_name='stem0.1.running_mean',
+        moving_variance_name='stem0.1.running_var')
+    relu_a = fluid.layers.relu(bn_a, inplace=True)
+    conv_b = fluid.layers.conv2d(
+        relu_a,
+        C_out,
+        3,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem0.3.weight'),
+        bias_attr=False)
+    bn_b = fluid.layers.batch_norm(
+        conv_b,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem0.4.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem0.4.bias'),
+        moving_mean_name='stem0.4.running_mean',
+        moving_variance_name='stem0.4.running_var')
+
+    return bn_b
+
+def Stem1Conv(input, C_out):
+    relu_a = fluid.layers.relu(input, inplace=True)
+    conv_a = fluid.layers.conv2d(
+        relu_a,
+        C_out,
+        3,
+        stride=2,
+        padding=1,
+        param_attr=ParamAttr(
+            initializer=Xavier(
+                uniform=False, fan_in=0), name='stem1.1.weight'),
+        bias_attr=False)
+    bn_a = fluid.layers.batch_norm(
+        conv_a,
+        param_attr=ParamAttr(
+            initializer=Constant(1.), name='stem1.2.weight'),
+        bias_attr=ParamAttr(
+            initializer=Constant(0.), name='stem1.2.bias'),
+        moving_mean_name='stem1.2.running_mean',
+        moving_variance_name='stem1.2.running_var')
+    return bn_a
+
+class NetworkImageNet(object):
+    def __init__(self, C, class_num, layers, auxiliary, genotype):
+        self.class_num = class_num
+        self._layers = layers
+        self._auxiliary = auxiliary
+
+        self.drop_path_prob = 0
+
+        C_prev_prev, C_prev, C_curr = C, C, C
+        self.cells = []
+        reduction_prev = True
+        for i in range(layers):
+            if i in [layers // 3, 2 * layers // 3]:
+                C_curr *= 2
+                reduction = True
+            else:
+                reduction = False
+            cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
+                        reduction_prev)
+            reduction_prev = reduction
+            self.cells += [cell]
+            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
+            if i == 2 * layers // 3:
+                C_to_auxiliary = C_prev
+        self.stem0 = functools.partial(Stem0Conv, C_out=C)
+        self.stem1 = functools.partial(Stem1Conv, C_out=C)
+
+    def build_input(self, image_shape, is_train):
+        if is_train:
+            py_reader = fluid.layers.py_reader(
+                capacity=64,
+                shapes=[[-1] + image_shape, [-1, 1]],
+                lod_levels=[0, 0],
+                dtypes=[
+                    "float32", "int64"],
+                use_double_buffer=True,
+                name='train_reader')
+        else:
+            py_reader = fluid.layers.py_reader(
+                capacity=64,
+                shapes=[[-1] + image_shape, [-1, 1]],
+                lod_levels=[0, 0],
+                dtypes=["float32", "int64"],
+                use_double_buffer=True,
+                name='test_reader')
+        return py_reader
+
+
+    def forward(self, init_channel, is_train):
+        self.training = is_train
+        self.logits_aux = None
+        num_channel = init_channel * 3
+        s0 = self.stem0(self.image)
+        s1 = self.stem1(s0)
+        for i, cell in enumerate(self.cells):
+            #s1.persistable = True
+            #print(s1)
+            name = 'cells.' + str(i) + '.'
+            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
+                                      name)
+            if i == int(2 * self._layers // 3):
+                if self._auxiliary and self.training:
+                    #s1.persistable = True
+                    #print(s1)
+                    self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num)
+                    #self.logits_aux.persistable = True
+                    #print(self.logits_aux)
+        out = fluid.layers.pool2d(s1, 7, "avg")
+        #out.persistable = True
+        #print(out)
+        self.logits = fluid.layers.fc(out,
+                                      size=self.class_num,
+                                      param_attr=ParamAttr(
+                                          initializer=Normal(scale=1e-3),
+                                          name='classifier.weight'),
+                                      bias_attr=ParamAttr(
+                                          initializer=Constant(0,),
+                                          name='classifier.bias'))
+        #self.logits.persistable = True
+        #print(self.logits)
+        #print(self.logits_aux)
+        return self.logits, self.logits_aux
+
+    def calc_loss(self, auxiliary, auxiliary_weight):
+        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
+        loss = fluid.layers.cross_entropy(prob, self.label)
+
+        loss_mean = fluid.layers.reduce_mean(loss)
+        #if auxiliary:
+        #    prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
+        #    loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
+        #    loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
+        prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
+        loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
+        loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
+        return loss_mean + auxiliary_weight * loss_aux_mean
+
+    def train_model(self, py_reader, init_channels, aux, aux_w):
+        self.image, self.label = fluid.layers.read_file(py_reader)
+        self.logits, self.logits_aux = self.forward(init_channels, True)
+        self.loss = self.calc_loss(aux, aux_w)
+        return self.loss
+
+    def test_model(self, py_reader, init_channels):
+        self.image, self.label = fluid.layers.read_file(py_reader)
+        self.logits, _ = self.forward(init_channels, False)
+        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
+        loss = fluid.layers.cross_entropy(prob, self.label)
+        acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1)
+        acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5)
+        return prob, acc_1, acc_5
+
--- a/LRC/operations.py
+++ b/LRC/operations.py
@@ -312,7 +312,7 @@ def FactorizedReduce(input, C_out, name='', affine=True):
        bias_attr=False)
    h_end = relu_a.shape[2]
    w_end = relu_a.shape[3]
-    slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end])
+    slice_a = fluid.layers.slice(input=relu_a, axes=[2, 3], starts=[1, 1], ends=[h_end, w_end])
    conv2d_b = fluid.layers.conv2d(
        slice_a,
        C_out // 2,

--- a/LRC/reader.py
+++ b/LRC/reader.py
@@ -31,7 +31,10 @@ from PIL import Image
 from PIL import ImageOps
 import numpy as np

-import cPickle
+try:
+    import cPickle as pickle
+except:
+    import pickle
 import random
 import utils
 import paddle.fluid as fluid
@@ -46,10 +49,9 @@ image_size = 32
 image_depth = 3
 half_length = 8

-CIFAR_MEAN = [0.4914, 0.4822, 0.4465]
+CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124]
 CIFAR_STD = [0.24703233, 0.24348505, 0.26158768]

-
 def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10):
    reshape_label = np.zeros((batch_size, 1), dtype='int32')
    reshape_non_label = np.zeros(
@@ -82,10 +84,11 @@ def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10):


 def preprocess(sample, is_training, args):
+
    image_array = sample.reshape(3, image_size, image_size)
    rgb_array = np.transpose(image_array, (1, 2, 0))
    img = Image.fromarray(rgb_array, 'RGB')
-
+    
    if is_training:
        # pad and ramdom crop
        img = ImageOps.expand(img, (4, 4, 4, 4), fill=0)  # pad to 40 * 40 * 3
@@ -94,13 +97,13 @@ def preprocess(sample, is_training, args):
                        left_top[1] + image_size))
        if np.random.randint(2):
            img = img.transpose(Image.FLIP_LEFT_RIGHT)
-
+    
    img = np.array(img).astype(np.float32)

    # per_image_standardization
    img_float = img / 255.0
    img = (img_float - CIFAR_MEAN) / CIFAR_STD
-
+   
    if is_training and args.cutout:
        center = np.random.randint(image_size, size=2)
        offset_width = max(0, center[0] - half_length)
@@ -111,7 +114,7 @@ def preprocess(sample, is_training, args):
        for i in range(offset_height, target_height):
            for j in range(offset_width, target_width):
                img[i][j][:] = 0.0
-
+    
    img = np.transpose(img, (2, 0, 1))
    return img

@@ -123,13 +126,15 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
    datasets = []
    for name in names:
        print("Reading file " + name)
-        batch = cPickle.load(open(filename + name, 'rb'))
+        batch = pickle.load(open(filename + name, 'rb'))
        data = batch['data']
        labels = batch.get('labels', batch.get('fine_labels', None))
        assert labels is not None
        dataset = zip(data, labels)
        datasets.extend(dataset)
-    random.shuffle(datasets)
+
+    if is_training:
+        random.shuffle(datasets)

    def read_batch(datasets, args):
        for sample, label in datasets:
@@ -145,6 +150,10 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
            if len(batch_data) == args.batch_size:
                batch_data = np.array(batch_data, dtype='float32')
                batch_label = np.array(batch_label, dtype='int64')
+#
+#                batch_data = pickle.load(open('input.pkl'))
+#                batch_label = pickle.load(open('target.pkl')).reshape(-1,1)
+#               
                if is_training:
                    flatten_label, flatten_non_label = \
                      generate_reshape_label(batch_label, args.batch_size)
@@ -160,6 +169,24 @@ def reader_creator_filepath(filename, sub_name, is_training, args):
                    yield batch_out
                batch_data = []
                batch_label = []
+        if len(batch_data) != 0:
+            batch_data = np.array(batch_data, dtype='float32')
+            batch_label = np.array(batch_label, dtype='int64')
+            if is_training:
+                flatten_label, flatten_non_label = \
+                  generate_reshape_label(batch_label, len(batch_data))
+                rad_var = generate_bernoulli_number(len(batch_data))
+                mixed_x, y_a, y_b, lam = utils.mixup_data(
+                    batch_data, batch_label, len(batch_data),
+                    args.mix_alpha)
+                batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \
+                            flatten_non_label, rad_var]]
+                yield batch_out
+            else:
+                batch_out = [[batch_data, batch_label]]
+                yield batch_out
+            batch_data = []
+            batch_label = []

    return reader


--- a/LRC/reader_imagenet.py
+++ b/LRC/reader_imagenet.py
+# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Based on:
+# --------------------------------------------------------
+# DARTS
+# Copyright (c) 2018, Hanxiao Liu.
+# Licensed under the Apache License, Version 2.0;
+# --------------------------------------------------------
+
+from PIL import Image
+from PIL import ImageOps
+import numpy as np
+
+try:
+    import cPickle as pickle
+except:
+    import pickle
+import random
+import utils
+import paddle.fluid as fluid
+import time
+import os
+import functools
+import paddle.reader
+import math
+
+__all__ = ['train10', 'test10']
+
+train_image_size = 224
+test_image_size = 256
+
+CIFAR_MEAN = [0.485, 0.456, 0.406]
+CIFAR_STD = [0.229, 0.224, 0.225]
+
+def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]):
+    aspect_ratio = math.sqrt(np.random.uniform(*ratio))
+    w = 1. * aspect_ratio
+    h = 1. / aspect_ratio
+
+    bound = min((float(img.size[0]) / img.size[1]) / (w**2),
+                (float(img.size[1]) / img.size[0]) / (h**2))
+    scale_max = min(scale[1], bound)
+    scale_min = min(scale[0], bound)
+
+    target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min,
+                                                                scale_max)
+    target_size = math.sqrt(target_area)
+    w = int(target_size * w)
+    h = int(target_size * h)
+
+    i = np.random.randint(0, img.size[0] - w + 1)
+    j = np.random.randint(0, img.size[1] - h + 1)
+
+    img = img.crop((i, j, i + w, j + h))
+    img = img.resize((size, size), Image.BILINEAR)
+    return img
+
+def crop_image(img, target_size, center=True):
+    width, height = img.size
+    size = target_size
+    if center == True:
+        w_start = (width - size) / 2
+        h_start = (height - size) / 2
+    else:
+        w_start = np.random.randint(0, width - size + 1)
+        h_start = np.random.randint(0, height - size + 1)
+    w_end = w_start + size
+    h_end = h_start + size
+    img = img.crop((w_start, h_start, w_end, h_end))
+    return img
+
+def preprocess(img_path, is_training):
+
+    img = Image.open(img_path)
+    
+    if is_training:
+        # ramdom resized crop
+        img = random_crop(img, train_image_size)
+        # random horizontal flip
+        if np.random.randint(2):
+            img = img.transpose(Image.FLIP_LEFT_RIGHT)
+    else:
+        # resize
+        img = img.resize((test_image_size, test_image_size), Image.BILINEAR)
+        # center crop
+        img = crop_image(img, train_image_size)            
+
+    if img.mode != 'RGB':
+        img = img.convert('RGB')
+
+    img = np.array(img).astype(np.float32)
+
+    # per_image_standardization
+    img_float = img / 255.0
+    img = (img_float - CIFAR_MEAN) / CIFAR_STD
+   
+    img = np.transpose(img, (2, 0, 1))
+    return img
+
+
+def reader_creator_filepath(data_dir, sub_name, is_training):
+
+    file_list = os.path.join(data_dir, sub_name)
+    image_file = 'train' if is_training else 'val'
+    dataset_path = os.path.join(data_dir, image_file)
+    print(dataset_path)
+    def reader():
+        with open(file_list) as flist:
+            lines = [line.strip() for line in flist]
+            if is_training:
+                np.random.shuffle(lines)
+            for line in lines:
+                img_path, label = line.split()
+                #img_path = img_path.replace("JPEG", "jpeg")
+                img_path_ = os.path.join(dataset_path, img_path)
+                img = preprocess(img_path_, is_training)
+                yield img, int(label)
+
+    return reader
+
+
+def train(args):
+    """
+    CIFAR-10 training set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Training reader creator
+    :rtype: callable
+    """
+
+    return reader_creator_filepath(args.data, 'train.txt', True)
+
+
+def test(args):
+    """
+    CIFAR-10 test set creator.
+    It returns a reader creator, each sample in the reader is image pixels in
+    [0, 1] and label in [0, 9].
+    :return: Test reader creator.
+    :rtype: callable
+    """
+    return reader_creator_filepath(args.data, 'val.txt', False)
--- a/LRC/run.sh
+++ b/LRC/run.sh
-CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \
--batch_size=80 \
--auxiliary \
--weight_decay=0.0003 \
--learning_rate=0.025 \
--lrc_loss_lambda=0.7 \
--cutout
-
--- a/LRC/train_imagenet.py
+++ b/LRC/train_imagenet.py
+#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
+#
+#Licensed under the Apache License, Version 2.0 (the "License");
+#you may not use this file except in compliance with the License.
+#You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+#Unless required by applicable law or agreed to in writing, software
+#distributed under the License is distributed on an "AS IS" BASIS,
+#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#See the License for the specific language governing permissions and
+#limitations under the License.
+#
+# Based on:
+# --------------------------------------------------------
+# DARTS
+# Copyright (c) 2018, Hanxiao Liu.
+# Licensed under the Apache License, Version 2.0;
+# --------------------------------------------------------
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+from learning_rate import cosine_with_warmup_decay
+import numpy as np
+import argparse
+from model import NetworkImageNet as Network
+import reader_imagenet as reader
+import sys
+import os
+import time
+import logging
+import genotypes
+import paddle
+import paddle.fluid as fluid
+import shutil
+import utils
+import math
+
+parser = argparse.ArgumentParser("imagenet")
+parser.add_argument(
+    '--data',
+    type=str,
+    default='./dataset/imagenet/',
+    help='location of the data corpus')
+parser.add_argument('--batch_size', type=int, default=64, help='batch size')
+parser.add_argument(
+    '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
+parser.add_argument('--model_id', type=int, default=2, help='model id')
+parser.add_argument(
+    '--learning_rate', type=float, default=0.025, help='init learning rate')
+parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
+parser.add_argument(
+    '--weight_decay', type=float, default=4e-5, help='weight decay')
+parser.add_argument(
+    '--report_freq', type=float, default=10, help='report frequency')
+parser.add_argument(
+    '--epochs', type=int, default=90, help='num of training epochs')
+parser.add_argument(
+    '--init_channels', type=int, default=96, help='num of init channels')
+parser.add_argument(
+    '--layers', type=int, default=20, help='total number of layers')
+parser.add_argument(
+    '--save_model_path',
+    type=str,
+    default='save_models',
+    help='path to save the model')
+parser.add_argument(
+    '--auxiliary',
+    action='store_true',
+    default=False,
+    help='use auxiliary tower')
+parser.add_argument(
+    '--auxiliary_weight',
+    type=float,
+    default=0.4,
+    help='weight for auxiliary loss')
+parser.add_argument(
+    '--drop_path_prob', type=float, default=0.4, help='drop path probability')
+parser.add_argument(
+    '--arch', type=str, default='DARTS', help='which architecture to use')
+parser.add_argument(
+    '--grad_clip', type=float, default=5, help='gradient clipping')
+parser.add_argument(
+    '--warmup_epochs',
+    default=5,
+    type=float,
+    help='warm up to learning rate')
+parser.add_argument('--lr_min', type=float, default=0.0001,
+                    help='minimum learning rate for a single GPU')
+
+args = parser.parse_args()
+
+ImageNet_CLASSES = 1000
+dataset_train_size = 1281167
+image_size = 224
+genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]
+
+def main():
+    image_shape = [3, image_size, image_size]
+    devices = os.getenv("CUDA_VISIBLE_DEVICES") or ""
+    devices_num = len(devices.split(","))
+    logging.info("args = %s", args)
+    genotype = eval("genotypes.%s" % args.arch)
+    model = Network(args.init_channels, ImageNet_CLASSES, args.layers,
+                    args.auxiliary, genotype)
+    
+    steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
+    train(model, args, image_shape, steps_one_epoch, devices_num)
+
+
+def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
+                  steps_one_epoch, num_gpu):
+    out = []
+    with fluid.program_guard(main_prog, startup_prog):
+        py_reader = model.build_input(im_shape, is_train)
+        if is_train:
+            with fluid.unique_name.guard():
+                loss = model.train_model(py_reader, args.init_channels,
+                                         args.auxiliary, args.auxiliary_weight)
+                optimizer = fluid.optimizer.Momentum(
+                        learning_rate=cosine_with_warmup_decay(\
+                            args.learning_rate, args.lr_min, steps_one_epoch,\
+                            args.warmup_epochs, args.epochs, num_gpu),
+                        regularization=fluid.regularizer.L2Decay(\
+                            args.weight_decay),
+                        momentum=args.momentum)
+                optimizer.minimize(loss)
+                out = [py_reader, loss]
+        else:
+            with fluid.unique_name.guard():
+                prob, acc_1, acc_5 = model.test_model(py_reader,
+                                                      args.init_channels)
+                out = [py_reader, prob, acc_1, acc_5]
+    return out
+
+
+def train(model, args, im_shape, steps_one_epoch, num_gpu):
+    train_startup_prog = fluid.Program()
+    test_startup_prog = fluid.Program()
+    train_prog = fluid.Program()
+    test_prog = fluid.Program()
+
+    train_py_reader, loss_train = build_program(train_prog, train_startup_prog,
+                                                args, True, model, im_shape,
+                                                steps_one_epoch, num_gpu)
+
+    test_py_reader, prob, acc_1, acc_5 = build_program(
+        test_prog, test_startup_prog, args, False, model, im_shape,
+        steps_one_epoch, num_gpu)
+
+    test_prog = test_prog.clone(for_test=True)
+
+    place = fluid.CUDAPlace(0)
+    exe = fluid.Executor(place)
+    exe.run(train_startup_prog)
+    exe.run(test_startup_prog)
+
+    #if args.pretrained_model:
+
+    #    def if_exist(var):
+    #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
+
+    #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
+
+    exec_strategy = fluid.ExecutionStrategy()
+    exec_strategy.num_threads = 1
+    train_exe = fluid.ParallelExecutor(
+        main_program=train_prog,
+        use_cuda=True,
+        loss_name=loss_train.name,
+        exec_strategy=exec_strategy)
+    
+    train_batch_size = args.batch_size
+    test_batch_size = 256
+    train_reader = paddle.batch(
+            reader.train(args), batch_size=train_batch_size, drop_last=True)
+    test_reader = paddle.batch(reader.test(args), batch_size=test_batch_size)
+
+    train_py_reader.decorate_paddle_reader(train_reader)
+    test_py_reader.decorate_paddle_reader(test_reader)
+
+    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
+    train_fetch_list = [loss_train]
+    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))
+
+    def save_model(postfix, main_prog):
+        model_path = os.path.join(args.save_model_path, postfix)
+        if os.path.isdir(model_path):
+            shutil.rmtree(model_path)
+        fluid.io.save_persistables(exe, model_path, main_program=main_prog)
+
+    def test(epoch_id):
+        test_fetch_list = [prob, acc_1, acc_5]
+        #objs = utils.AvgrageMeter()
+        #prob = []
+        top1 = utils.AvgrageMeter()
+        top5 = utils.AvgrageMeter()
+        test_py_reader.start()
+        test_start_time = time.time()
+        step_id = 0
+        try:
+            while True:
+                prev_test_start_time = test_start_time
+                test_start_time = time.time()
+                prob_v, acc_1_v, acc_5_v = exe.run(
+                    test_prog, fetch_list=test_fetch_list)
+                top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
+                top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
+                if step_id % args.report_freq == 0:
+                    print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
+                          format(epoch_id, step_id,
+                                 np.array(acc_1_v),
+                                 np.array(acc_5_v), test_start_time -
+                                 prev_test_start_time))
+                step_id += 1
+        except fluid.core.EOFException:
+            test_py_reader.reset()
+        print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
+                                                     top5.avg))
+
+    epoch_start_time = time.time()
+    for epoch_id in range(args.epochs):
+        model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs
+        train_py_reader.start()
+        epoch_end_time = time.time()
+        if epoch_id > 0:
+            print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time
+                                                   - epoch_start_time))
+        epoch_start_time = epoch_end_time
+        epoch_end_time
+        start_time = time.time()
+        step_id = 0
+        try:
+            while True:
+                prev_start_time = start_time
+                start_time = time.time()
+                loss_v, = train_exe.run(
+                    fetch_list=[v.name for v in train_fetch_list])
+                print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \
+                        np.array(loss_v).mean(), start_time-prev_start_time))
+                step_id += 1
+                sys.stdout.flush()
+                os._exit(1)
+        except fluid.core.EOFException:
+            train_py_reader.reset()
+        if epoch_id % 50 == 0 or epoch_id == args.epochs - 1:
+            save_model(str(epoch_id), train_prog)
+        test(epoch_id)
+
+
+if __name__ == '__main__':
+    main()
--- a/LRC/train_mixup.py
+++ b/LRC/train_mixup.py
@@ -26,7 +26,7 @@ from learning_rate import cosine_decay
 import numpy as np
 import argparse
 from model import NetworkCIFAR as Network
-import reader
+import reader_cifar as reader
 import sys
 import os
 import time
@@ -35,7 +35,7 @@ import genotypes
 import paddle.fluid as fluid
 import shutil
 import utils
-import cPickle as cp
+import math

 parser = argparse.ArgumentParser("cifar")
 parser.add_argument(
@@ -44,6 +44,9 @@ parser.add_argument(
    default='./dataset/cifar/cifar-10-batches-py/',
    help='location of the data corpus')
 parser.add_argument('--batch_size', type=int, default=96, help='batch size')
+parser.add_argument(
+    '--pretrained_model', type=str, default='/save_models/599', help='pretrained model to load')
+parser.add_argument('--model_id', type=int, help='model id')
 parser.add_argument(
    '--learning_rate', type=float, default=0.025, help='init learning rate')
 parser.add_argument('--momentum', type=float, default=0.9, help='momentum')
@@ -58,7 +61,7 @@ parser.add_argument(
 parser.add_argument(
    '--layers', type=int, default=20, help='total number of layers')
 parser.add_argument(
-    '--model_path',
+    '--save_model_path',
    type=str,
    default='saved_models',
    help='path to save the model')
@@ -78,7 +81,6 @@ parser.add_argument(
    '--cutout_length', type=int, default=16, help='cutout length')
 parser.add_argument(
    '--drop_path_prob', type=float, default=0.2, help='drop path probability')
-parser.add_argument('--save', type=str, default='EXP', help='experiment name')
 parser.add_argument(
    '--arch', type=str, default='DARTS', help='which architecture to use')
 parser.add_argument(
@@ -100,9 +102,9 @@ parser.add_argument(
 args = parser.parse_args()

 CIFAR_CLASSES = 10
-dataset_train_size = 50000
+dataset_train_size = 50000.
 image_size = 32
-
+genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id]

 def main():
    image_shape = [3, image_size, image_size]
@@ -112,7 +114,8 @@ def main():
    genotype = eval("genotypes.%s" % args.arch)
    model = Network(args.init_channels, CIFAR_CLASSES, args.layers,
                    args.auxiliary, genotype)
-    steps_one_epoch = dataset_train_size / (devices_num * args.batch_size)
+    
+    steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size))
    train(model, args, image_shape, steps_one_epoch)


@@ -120,12 +123,12 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
                  steps_one_epoch):
    out = []
    with fluid.program_guard(main_prog, startup_prog):
-        py_reader = model.build_input(im_shape, args.batch_size, is_train)
+        py_reader = model.build_input(im_shape, is_train)
        if is_train:
            with fluid.unique_name.guard():
                loss = model.train_model(py_reader, args.init_channels,
                                         args.auxiliary, args.auxiliary_weight,
-                                         args.batch_size, args.lrc_loss_lambda)
+                                         args.lrc_loss_lambda)
                optimizer = fluid.optimizer.Momentum(
                        learning_rate=cosine_decay(args.learning_rate, \
                            args.epochs, steps_one_epoch),
@@ -136,9 +139,9 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape,
                out = [py_reader, loss]
        else:
            with fluid.unique_name.guard():
-                loss, acc_1, acc_5 = model.test_model(py_reader,
+                prob, acc_1, acc_5 = model.test_model(py_reader,
                                                      args.init_channels)
-                out = [py_reader, loss, acc_1, acc_5]
+                out = [py_reader, prob, acc_1, acc_5]
    return out


@@ -152,7 +155,7 @@ def train(model, args, im_shape, steps_one_epoch):
                                                args, True, model, im_shape,
                                                steps_one_epoch)

-    test_py_reader, loss_test, acc_1, acc_5 = build_program(
+    test_py_reader, prob, acc_1, acc_5 = build_program(
        test_prog, test_startup_prog, args, False, model, im_shape,
        steps_one_epoch)

@@ -163,6 +166,13 @@ def train(model, args, im_shape, steps_one_epoch):
    exe.run(train_startup_prog)
    exe.run(test_startup_prog)

+    #if args.pretrained_model:
+
+    #    def if_exist(var):
+    #        return os.path.exists(os.path.join(args.pretrained_model, var.name))
+
+    #    fluid.io.load_vars(exe, args.pretrained_model, main_program=train_prog, predicate=if_exist)
+
    exec_strategy = fluid.ExecutionStrategy()
    exec_strategy.num_threads = 1
    train_exe = fluid.ParallelExecutor(
@@ -170,23 +180,27 @@ def train(model, args, im_shape, steps_one_epoch):
        use_cuda=True,
        loss_name=loss_train.name,
        exec_strategy=exec_strategy)
+    
+
    train_reader = reader.train10(args)
    test_reader = reader.test10(args)
    train_py_reader.decorate_paddle_reader(train_reader)
    test_py_reader.decorate_paddle_reader(test_reader)

-    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip))
-    fluid.memory_optimize(fluid.default_main_program())
+    fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog)
+    train_fetch_list = [loss_train]
+    fluid.memory_optimize(train_prog, skip_opt_set=set(train_fetch_list))

    def save_model(postfix, main_prog):
-        model_path = os.path.join(args.model_path, postfix)
+        model_path = os.path.join(args.save_model_path, postfix)
        if os.path.isdir(model_path):
            shutil.rmtree(model_path)
        fluid.io.save_persistables(exe, model_path, main_program=main_prog)

    def test(epoch_id):
-        test_fetch_list = [loss_test, acc_1, acc_5]
-        objs = utils.AvgrageMeter()
+        test_fetch_list = [prob, acc_1, acc_5]
+        #objs = utils.AvgrageMeter()
+        #prob = []
        top1 = utils.AvgrageMeter()
        top5 = utils.AvgrageMeter()
        test_py_reader.start()
@@ -196,11 +210,10 @@ def train(model, args, im_shape, steps_one_epoch):
            while True:
                prev_test_start_time = test_start_time
                test_start_time = time.time()
-                loss_test_v, acc_1_v, acc_5_v = exe.run(
+                prob_v, acc_1_v, acc_5_v = exe.run(
                    test_prog, fetch_list=test_fetch_list)
-                objs.update(np.array(loss_test_v), args.batch_size)
-                top1.update(np.array(acc_1_v), args.batch_size)
-                top5.update(np.array(acc_5_v), args.batch_size)
+                top1.update(np.array(acc_1_v), np.array(prob_v).shape[0])
+                top5.update(np.array(acc_5_v), np.array(prob_v).shape[0])
                if step_id % args.report_freq == 0:
                    print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}".
                          format(epoch_id, step_id,
@@ -213,7 +226,6 @@ def train(model, args, im_shape, steps_one_epoch):
        print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg,
                                                     top5.avg))

-    train_fetch_list = [loss_train]
    epoch_start_time = time.time()
    for epoch_id in range(args.epochs):
        model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs

--- a/LRC/utils.py
+++ b/LRC/utils.py
@@ -34,6 +34,10 @@ def mixup_data(x, y, batch_size, alpha=1.0):
        lam = 1.
    index = np.random.permutation(batch_size)

+    #
+    #lam = 0.5
+    #index = np.arange(batch_size-1, -1, -1)
+    #
    mixed_x = lam * x + (1 - lam) * x[index, :]
    y_a, y_b = y, y[index]
    return mixed_x.astype('float32'), y_a.astype('int64'),\

--- a/LRC/voting.py
+++ b/LRC/voting.py
+import numpy as np
+import cPickle as cp
+import sys, os
+
+#model_path = 'final_paddle-results'
+model_path = 'paddle-results'
+fl = os.listdir(model_path)
+labels = np.load('labels.npz')['arr_0']
+pred = np.zeros((10000, 10))
+fl.sort()
+i = 0
+weight=1
+for f in fl:
+    print(f)
+    if i == 1: weight=1.2
+    if i == 2: weight=0.8
+    if i == 3: weight=1.3
+    if i == 4: weight=1.1
+    if i == 5: weight=0.9
+    pred += weight* cp.load(open(os.path.join(model_path, f)))
+    print(np.mean(np.argmax(pred, axis=1) == labels))
+    i += 1