# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. #You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # #Unless required by applicable law or agreed to in writing, software #distributed under the License is distributed on an "AS IS" BASIS, #WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. #See the License for the specific language governing permissions and #limitations under the License. # # Based on: # -------------------------------------------------------- # DARTS # Copyright (c) 2018, Hanxiao Liu. # Licensed under the Apache License, Version 2.0; # -------------------------------------------------------- from __future__ import absolute_import from __future__ import division from __future__ import print_function from learning_rate import cosine_decay import numpy as np import argparse from model import NetworkCIFAR as Network import reader_cifar as reader import sys import os import time import logging import genotypes import paddle.fluid as fluid import shutil import utils import math parser = argparse.ArgumentParser("cifar") # yapf: disable parser.add_argument('--data', type=str, default='./dataset/cifar/cifar-10-batches-py/', help='location of the data corpus') parser.add_argument('--batch_size', type=int, default=96, help='batch size') parser.add_argument('--pretrained_model', type=str, default=None, help='pretrained model to load') parser.add_argument('--model_id', type=int, help='model id') parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate') parser.add_argument('--momentum', type=float, default=0.9, help='momentum') parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') parser.add_argument('--report_freq', type=float, default=50, help='report frequency') parser.add_argument('--epochs', type=int, default=600, help='num of training epochs') parser.add_argument('--init_channels', type=int, default=36, help='num of init channels') parser.add_argument('--layers', type=int, default=20, help='total number of layers') parser.add_argument('--save_model_path', type=str, default='saved_models', help='path to save the model') parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss') parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability') parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') parser.add_argument('--lr_exp_decay', action='store_true', default=False, help='use exponential_decay learning_rate') parser.add_argument('--mix_alpha', type=float, default=0.5, help='mixup alpha') parser.add_argument('--lrc_loss_lambda', default=0, type=float, help='lrc_loss_lambda') # yapf: enable args = parser.parse_args() CIFAR_CLASSES = 10 dataset_train_size = 50000. image_size = 32 genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] def main(): image_shape = [3, image_size, image_size] devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" devices_num = len(devices.split(",")) logging.info("args = %s", args) genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) steps_one_epoch = math.ceil(dataset_train_size / (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch) def build_program(main_prog, startup_prog, args, is_train, model, im_shape, steps_one_epoch): out = [] with fluid.program_guard(main_prog, startup_prog): py_reader = model.build_input(im_shape, is_train) if is_train: with fluid.unique_name.guard(): loss = model.train_model(py_reader, args.init_channels, args.auxiliary, args.auxiliary_weight, args.lrc_loss_lambda) optimizer = fluid.optimizer.Momentum( learning_rate=cosine_decay(args.learning_rate, args.epochs, steps_one_epoch), regularization=fluid.regularizer.L2Decay(args.weight_decay), momentum=args.momentum) optimizer.minimize(loss) out = [py_reader, loss] else: with fluid.unique_name.guard(): prob, acc_1, acc_5 = model.test_model(py_reader, args.init_channels) out = [py_reader, prob, acc_1, acc_5] return out def train(model, args, im_shape, steps_one_epoch): startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() train_py_reader, loss_train = build_program( train_prog, startup_prog, args, True, model, im_shape, steps_one_epoch) test_py_reader, prob, acc_1, acc_5 = build_program( test_prog, startup_prog, args, False, model, im_shape, steps_one_epoch) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) exe.run(startup_prog) if args.pretrained_model: def if_exist(var): return os.path.exists(os.path.join(args.pretrained_model, var.name)) fluid.io.load_vars( exe, args.pretrained_model, main_program=train_prog, predicate=if_exist) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 build_strategy = fluid.BuildStrategy() build_strategy.memory_optimize = False build_strategy.enable_inplace = True compile_program = fluid.compiler.CompiledProgram( train_prog).with_data_parallel( loss_name=loss_train.name, build_strategy=build_strategy, exec_strategy=exec_strategy) train_reader = reader.train10(args) test_reader = reader.test10(args) train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) fluid.clip.set_gradient_clip( fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) train_fetch_list = [loss_train] def save_model(postfix, main_prog): model_path = os.path.join(args.save_model_path, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path, main_program=main_prog) def test(epoch_id): test_fetch_list = [prob, acc_1, acc_5] top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() test_py_reader.start() test_start_time = time.time() step_id = 0 try: while True: prev_test_start_time = test_start_time test_start_time = time.time() prob_v, acc_1_v, acc_5_v = exe.run(test_prog, fetch_list=test_fetch_list) top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) if step_id % args.report_freq == 0: print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". format(epoch_id, step_id, np.array(acc_1_v), np.array(acc_5_v), test_start_time - prev_test_start_time)) step_id += 1 except fluid.core.EOFException: test_py_reader.reset() print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, top5.avg)) epoch_start_time = time.time() for epoch_id in range(args.epochs): model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs train_py_reader.start() epoch_end_time = time.time() if epoch_id > 0: print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time - epoch_start_time)) epoch_start_time = epoch_end_time epoch_end_time start_time = time.time() step_id = 0 try: while True: prev_start_time = start_time start_time = time.time() loss_v, = exe.run( compile_program, fetch_list=[v.name for v in train_fetch_list]) print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \ np.array(loss_v).mean(), start_time-prev_start_time)) step_id += 1 sys.stdout.flush() except fluid.core.EOFException: train_py_reader.reset() if epoch_id % 50 == 0: save_model(str(epoch_id), train_prog) if epoch_id == args.epochs - 1: save_model('final', train_prog) test(epoch_id) if __name__ == '__main__': main()