提交 1cf2e6d4 编写于 作者: D danleifeng 提交者: lilong12

edit mixed precision user interface (#20)

* edit fp16 user interface
* edit fp16 doc
上级 f0978c37
...@@ -10,25 +10,42 @@ PLSC支持混合精度训练。使用混合精度训练可以提升训练的速 ...@@ -10,25 +10,42 @@ PLSC支持混合精度训练。使用混合精度训练可以提升训练的速
from plsc import Entry from plsc import Entry
def main(): def main():
ins = Entry() ins = Entry()
ins.set_mixed_precision(True, 1.0) ins.set_mixed_precision(True)
ins.train() ins.train()
if __name__ == "__main__": if __name__ == "__main__":
main() main()
``` ```
其中,`set_mixed_precision`函数介绍如下: 其中,`set_mixed_precision`函数介绍如下:
| API | 描述 | 参数说明 | | API | 描述 |
| :------------------- | :--------------------| :---------------------- | | --- | ---|
| set_mixed_precision(use_fp16, loss_scaling) | 设置混合精度训练 | `use_fp16`为是否开启混合精度训练,默认为False;`loss_scaling`为初始的损失缩放值,默认为1.0| | set_mixed_precision| 设置混合精度训练
- `use_fp16`:bool类型,当想要开启混合精度训练时,可将此参数设为True即可。 ## 参数说明
- `loss_scaling`:float类型,为初始的损失缩放值,这个值有可能会影响混合精度训练的精度,建议设为默认值1.0。 set_mixed_precision 函数提供7个参数,其中use_fp16为必选项,决定是否开启混合精度训练,其他6个参数均有默认值,具体说明如下:
为了提高混合精度训练的稳定性和精度,默认开启了动态损失缩放机制。更多关于混合精度训练的介绍可参考:[混合精度训练](https://arxiv.org/abs/1710.03740) | 参数 | 类型 | 默认值| 说明
| --- | --- | ---|---|
|use_fp16| bool | 无,需用户设定| 是否开启混合精度训练,设为True为开启混合精度训练
|init_loss_scaling| float | 1.0|初始的损失缩放值,这个值有可能会影响混合精度训练的精度,建议设为默认值
|incr_every_n_steps | int | 2000|累计迭代`incr_every_n_steps`步都没出现FP16的越界,loss_scaling则会增加`incr_ratio`倍,建议设为默认值
|decr_every_n_nan_or_inf| int | 2|累计迭代`decr_every_n_nan_or_inf`步出现了FP16的越界,loss_scaling则会缩小为原来的`decr_ratio`倍,建议设为默认值
|incr_ratio |float|2.0|扩大loss_scaling的倍数,建议设为默认值
|decr_ratio| float |0.5| 缩小loss_scaling的倍数,建议设为默认值
|use_dynamic_loss_scaling | bool | True| 是否使用动态损失缩放机制。如果开启,才会用到`incr_every_n_steps``decr_every_n_nan_or_inf``incr_ratio``decr_ratio`四个参数,开启会提高混合精度训练的稳定性和精度,建议设为默认值
|amp_lists|AutoMixedPrecisionLists类|None|自动混合精度列表类,可以指定具体使用fp16计算的operators列表,建议设为默认值
更多关于混合精度训练的介绍可参考:
- Paper: [MIXED PRECISION TRAINING](https://arxiv.org/abs/1710.03740)
- Nvidia Introduction: [Training With Mixed Precision](https://docs.nvidia.com/deeplearning/sdk/mixed-precision-training/index.html)
## 训练性能 ## 训练性能
配置: Nvidia Tesla v100 GPU 单机8卡 配置: Nvidia Tesla v100 GPU 单机8卡
| 模型\速度 | FP32训练 | 混合精度训练 | 加速比 | | 模型\速度 | FP32训练 | 混合精度训练 | 加速比 |
| --- | --- | --- | --- | | --- | --- | --- | --- |
| ResNet50 | 2567.96 images/s | 3643.11 images/s | 1.42 | | ResNet50 | 2567.96 images/s | 3643.11 images/s | 1.42 |
备注:上述模型训练使用的loss_type均为'dist_arcface'。 备注:上述模型训练使用的loss_type均为'dist_arcface'。
...@@ -48,7 +48,7 @@ logging.basicConfig( ...@@ -48,7 +48,7 @@ logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s', format='%(asctime)s - %(levelname)s - %(message)s',
datefmt='%d %b %Y %H:%M:%S') datefmt='%d %b %Y %H:%M:%S')
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
class Entry(object): class Entry(object):
""" """
...@@ -100,7 +100,6 @@ class Entry(object): ...@@ -100,7 +100,6 @@ class Entry(object):
self.fs_dir = None self.fs_dir = None
self.use_fp16 = False self.use_fp16 = False
self.init_loss_scaling = 1.0
self.fp16_user_dict = None self.fp16_user_dict = None
self.val_targets = self.config.val_targets self.val_targets = self.config.val_targets
...@@ -149,16 +148,30 @@ class Entry(object): ...@@ -149,16 +148,30 @@ class Entry(object):
self.global_train_batch_size = batch_size * self.num_trainers self.global_train_batch_size = batch_size * self.num_trainers
logger.info("Set train batch size to {}.".format(batch_size)) logger.info("Set train batch size to {}.".format(batch_size))
def set_mixed_precision(self, use_fp16, loss_scaling): def set_mixed_precision(self,
use_fp16,
init_loss_scaling = 1.0,
incr_every_n_steps = 2000,
decr_every_n_nan_or_inf = 2,
incr_ratio = 2.0,
decr_ratio = 0.5,
use_dynamic_loss_scaling = True,
amp_lists = None):
""" """
Whether to use mixed precision training. Whether to use mixed precision training.
""" """
self.use_fp16 = use_fp16 self.use_fp16 = use_fp16
self.init_loss_scaling = loss_scaling
self.fp16_user_dict = dict() self.fp16_user_dict = dict()
self.fp16_user_dict['init_loss_scaling'] = self.init_loss_scaling self.fp16_user_dict['init_loss_scaling'] = init_loss_scaling
self.fp16_user_dict['incr_every_n_steps'] = incr_every_n_steps
self.fp16_user_dict['decr_every_n_nan_or_inf'] = decr_every_n_nan_or_inf
self.fp16_user_dict['incr_ratio'] = incr_ratio
self.fp16_user_dict['decr_ratio'] = decr_ratio
self.fp16_user_dict['use_dynamic_loss_scaling'] = use_dynamic_loss_scaling
self.fp16_user_dict['amp_lists'] = amp_lists
logger.info("Use mixed precision training: {}.".format(use_fp16)) logger.info("Use mixed precision training: {}.".format(use_fp16))
logger.info("Set init loss scaling to {}.".format(loss_scaling)) for key in self.fp16_user_dict:
logger.info("Set init {} to {}.".format(key, self.fp16_user_dict[key]))
def set_test_batch_size(self, batch_size): def set_test_batch_size(self, batch_size):
self.test_batch_size = batch_size self.test_batch_size = batch_size
...@@ -168,7 +181,7 @@ class Entry(object): ...@@ -168,7 +181,7 @@ class Entry(object):
def set_hdfs_info(self, fs_name, fs_ugi, directory): def set_hdfs_info(self, fs_name, fs_ugi, directory):
""" """
Set the info to download from or upload to hdfs filesystems. Set the info to download from or upload to hdfs filesystems.
If the information is provided, we will download pretrained If the information is provided, we will download pretrained
model from hdfs at the begining and upload pretrained models model from hdfs at the begining and upload pretrained models
to hdfs at the end automatically. to hdfs at the end automatically.
""" """
...@@ -281,13 +294,13 @@ class Entry(object): ...@@ -281,13 +294,13 @@ class Entry(object):
if not self.optimizer: if not self.optimizer:
bd = [step for step in self.lr_steps] bd = [step for step in self.lr_steps]
start_lr = self.lr start_lr = self.lr
global_batch_size = self.global_train_batch_size global_batch_size = self.global_train_batch_size
train_image_num = self.train_image_num train_image_num = self.train_image_num
images_per_trainer = int(math.ceil( images_per_trainer = int(math.ceil(
train_image_num * 1.0 / self.num_trainers)) train_image_num * 1.0 / self.num_trainers))
steps_per_pass = int(math.ceil( steps_per_pass = int(math.ceil(
images_per_trainer * 1.0 / self.train_batch_size)) images_per_trainer * 1.0 / self.train_batch_size))
logger.info("Steps per epoch: %d" % steps_per_pass) logger.info("Steps per epoch: %d" % steps_per_pass)
warmup_steps = steps_per_pass * self.warmup_epochs warmup_steps = steps_per_pass * self.warmup_epochs
batch_denom = 1024 batch_denom = 1024
...@@ -300,12 +313,12 @@ class Entry(object): ...@@ -300,12 +313,12 @@ class Entry(object):
values=lr), warmup_steps, start_lr, base_lr) values=lr), warmup_steps, start_lr, base_lr)
else: else:
lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr) lr_val = fluid.layers.piecewise_decay(boundaries=bd, values=lr)
optimizer = fluid.optimizer.Momentum( optimizer = fluid.optimizer.Momentum(
learning_rate=lr_val, momentum=0.9, learning_rate=lr_val, momentum=0.9,
regularization=fluid.regularizer.L2Decay(5e-4)) regularization=fluid.regularizer.L2Decay(5e-4))
self.optimizer = optimizer self.optimizer = optimizer
if self.loss_type in ["dist_softmax", "dist_arcface"]: if self.loss_type in ["dist_softmax", "dist_arcface"]:
self.optimizer = DistributedClassificationOptimizer( self.optimizer = DistributedClassificationOptimizer(
self.optimizer, global_batch_size, use_fp16=self.use_fp16, self.optimizer, global_batch_size, use_fp16=self.use_fp16,
...@@ -313,7 +326,15 @@ class Entry(object): ...@@ -313,7 +326,15 @@ class Entry(object):
fp16_user_dict=self.fp16_user_dict) fp16_user_dict=self.fp16_user_dict)
elif self.use_fp16: elif self.use_fp16:
self.optimizer = fluid.contrib.mixed_precision.decorate( self.optimizer = fluid.contrib.mixed_precision.decorate(
optimizer=optimizer, init_loss_scaling=self.init_loss_scaling) optimizer=optimizer,
init_loss_scaling=self.fp16_user_dict['init_loss_scaling'],
incr_every_n_steps=self.fp16_user_dict['incr_every_n_steps'],
decr_every_n_nan_or_inf=self.fp16_user_dict['decr_every_n_nan_or_inf'],
incr_ratio=self.fp16_user_dict['incr_ratio'],
decr_ratio=self.fp16_user_dict['decr_ratio'],
use_dynamic_loss_scaling=self.fp16_user_dict['use_dynamic_loss_scaling'],
amp_lists=self.fp16_user_dict['amp_lists']
)
return self.optimizer return self.optimizer
def build_program(self, def build_program(self,
...@@ -428,7 +449,7 @@ class Entry(object): ...@@ -428,7 +449,7 @@ class Entry(object):
stdout=sys.stdout, stdout=sys.stdout,
stderr=subprocess.STDOUT) stderr=subprocess.STDOUT)
process.wait() process.wait()
for file in os.listdir(local_dir): for file in os.listdir(local_dir):
if "dist@" in file and "@rank@" in file: if "dist@" in file and "@rank@" in file:
file = os.path.join(local_dir, file) file = os.path.join(local_dir, file)
...@@ -442,10 +463,10 @@ class Entry(object): ...@@ -442,10 +463,10 @@ class Entry(object):
def _append_broadcast_ops(self, program): def _append_broadcast_ops(self, program):
""" """
Before test, we broadcast bathnorm-related parameters to all Before test, we broadcast bathnorm-related parameters to all
other trainers from trainer-0. other trainers from trainer-0.
""" """
bn_vars = [var for var in program.list_vars() bn_vars = [var for var in program.list_vars()
if 'batch_norm' in var.name and var.persistable] if 'batch_norm' in var.name and var.persistable]
block = program.current_block() block = program.current_block()
for var in bn_vars: for var in bn_vars:
...@@ -475,7 +496,7 @@ class Entry(object): ...@@ -475,7 +496,7 @@ class Entry(object):
shutil.rmtree(checkpoint_dir) shutil.rmtree(checkpoint_dir)
os.makedirs(checkpoint_dir) os.makedirs(checkpoint_dir)
# sync all trainers to avoid loading checkpoints before # sync all trainers to avoid loading checkpoints before
# parameters are downloaded # parameters are downloaded
file_name = os.path.join(checkpoint_dir, '.lock') file_name = os.path.join(checkpoint_dir, '.lock')
if self.trainer_id == 0: if self.trainer_id == 0:
...@@ -483,14 +504,14 @@ class Entry(object): ...@@ -483,14 +504,14 @@ class Entry(object):
with open(file_name, 'w') as f: with open(file_name, 'w') as f:
pass pass
time.sleep(10) time.sleep(10)
os.remove(file_name) os.remove(file_name)
else: else:
while True: while True:
if not os.path.exists(file_name): if not os.path.exists(file_name):
time.sleep(1) time.sleep(1)
else: else:
break break
# Preporcess distributed parameters. # Preporcess distributed parameters.
file_name = os.path.join(checkpoint_dir, '.lock') file_name = os.path.join(checkpoint_dir, '.lock')
distributed = self.loss_type in ["dist_softmax", "dist_arcface"] distributed = self.loss_type in ["dist_softmax", "dist_arcface"]
...@@ -499,7 +520,7 @@ class Entry(object): ...@@ -499,7 +520,7 @@ class Entry(object):
with open(file_name, 'w') as f: with open(file_name, 'w') as f:
pass pass
time.sleep(10) time.sleep(10)
os.remove(file_name) os.remove(file_name)
elif load_for_train and distributed: elif load_for_train and distributed:
# wait trainer_id (0) to complete # wait trainer_id (0) to complete
while True: while True:
...@@ -600,7 +621,7 @@ class Entry(object): ...@@ -600,7 +621,7 @@ class Entry(object):
feeder = fluid.DataFeeder(place=place, feeder = fluid.DataFeeder(place=place,
feed_list=['image', 'label'], program=main_program) feed_list=['image', 'label'], program=main_program)
fetch_list = [emb.name] fetch_list = [emb.name]
for data in predict_reader(): for data in predict_reader():
emb = exe.run(main_program, feed=feeder.feed(data), emb = exe.run(main_program, feed=feeder.feed(data),
...@@ -674,33 +695,33 @@ class Entry(object): ...@@ -674,33 +695,33 @@ class Entry(object):
embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
embeddings[start:start+real_test_batch_size, :] = _embeddings[:, :] embeddings[start:start+real_test_batch_size, :] = _embeddings[:, :]
beg = parallel_test_steps * real_test_batch_size beg = parallel_test_steps * real_test_batch_size
while beg < data.shape[0]: while beg < data.shape[0]:
end = min(beg + self.test_batch_size, data.shape[0]) end = min(beg + self.test_batch_size, data.shape[0])
count = end - beg count = end - beg
_data = [] _data = []
for k in xrange(end - self.test_batch_size, end): for k in xrange(end - self.test_batch_size, end):
_data.append((data[k], 0)) _data.append((data[k], 0))
[_embeddings] = exe.run(test_program, [_embeddings] = exe.run(test_program,
fetch_list = fetch_list, feed=feeder.feed(_data), fetch_list = fetch_list, feed=feeder.feed(_data),
use_program_cache=True) use_program_cache=True)
_embeddings = _embeddings[0:self.test_batch_size,:] _embeddings = _embeddings[0:self.test_batch_size,:]
embeddings[beg:end, :] = _embeddings[(self.test_batch_size-count):, :] embeddings[beg:end, :] = _embeddings[(self.test_batch_size-count):, :]
beg = end beg = end
embeddings_list.append(embeddings) embeddings_list.append(embeddings)
xnorm = 0.0 xnorm = 0.0
xnorm_cnt = 0 xnorm_cnt = 0
for embed in embeddings_list: for embed in embeddings_list:
xnorm += np.sqrt((embed * embed).sum(axis=1)).sum(axis=0) xnorm += np.sqrt((embed * embed).sum(axis=1)).sum(axis=0)
xnorm_cnt += embed.shape[0] xnorm_cnt += embed.shape[0]
xnorm /= xnorm_cnt xnorm /= xnorm_cnt
embeddings = embeddings_list[0] + embeddings_list[1] embeddings = embeddings_list[0] + embeddings_list[1]
embeddings = sklearn.preprocessing.normalize(embeddings) embeddings = sklearn.preprocessing.normalize(embeddings)
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10) _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
acc, std = np.mean(accuracy), np.std(accuracy) acc, std = np.mean(accuracy), np.std(accuracy)
print('[%s][%d]XNorm: %f' % (test_name_list[i], pass_id, xnorm)) print('[%s][%d]XNorm: %f' % (test_name_list[i], pass_id, xnorm))
print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (test_name_list[i], pass_id, acc, std)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (test_name_list[i], pass_id, acc, std))
sys.stdout.flush() sys.stdout.flush()
...@@ -712,7 +733,7 @@ class Entry(object): ...@@ -712,7 +733,7 @@ class Entry(object):
trainer_id = self.trainer_id trainer_id = self.trainer_id
num_trainers = self.num_trainers num_trainers = self.num_trainers
role = role_maker.PaddleCloudRoleMaker(is_collective=True) role = role_maker.PaddleCloudRoleMaker(is_collective=True)
fleet.init(role) fleet.init(role)
strategy = DistributedStrategy() strategy = DistributedStrategy()
...@@ -720,7 +741,7 @@ class Entry(object): ...@@ -720,7 +741,7 @@ class Entry(object):
strategy.collective_mode = "grad_allreduce" strategy.collective_mode = "grad_allreduce"
self.fleet = fleet self.fleet = fleet
self.strategy = strategy self.strategy = strategy
train_emb, train_loss, train_acc1, train_acc5, optimizer = \ train_emb, train_loss, train_acc1, train_acc5, optimizer = \
self.build_program(True, False) self.build_program(True, False)
if self.with_test: if self.with_test:
...@@ -730,10 +751,10 @@ class Entry(object): ...@@ -730,10 +751,10 @@ class Entry(object):
self.dataset_dir, self.val_targets) self.dataset_dir, self.val_targets)
test_program = self.test_program test_program = self.test_program
self._append_broadcast_ops(test_program) self._append_broadcast_ops(test_program)
global_lr = optimizer._global_learning_rate( global_lr = optimizer._global_learning_rate(
program=self.train_program) program=self.train_program)
origin_prog = fleet._origin_program origin_prog = fleet._origin_program
train_prog = fleet.main_program train_prog = fleet.main_program
if trainer_id == 0: if trainer_id == 0:
...@@ -745,25 +766,25 @@ class Entry(object): ...@@ -745,25 +766,25 @@ class Entry(object):
program_to_code(origin_prog, fout, True) program_to_code(origin_prog, fout, True)
with open('test.program', 'w') as fout: with open('test.program', 'w') as fout:
program_to_code(test_program, fout, True) program_to_code(test_program, fout, True)
gpu_id = int(os.getenv("FLAGS_selected_gpus", 0)) gpu_id = int(os.getenv("FLAGS_selected_gpus", 0))
place = fluid.CUDAPlace(gpu_id) place = fluid.CUDAPlace(gpu_id)
exe = fluid.Executor(place) exe = fluid.Executor(place)
exe.run(self.startup_program) exe.run(self.startup_program)
if self.with_test: if self.with_test:
test_feeder = fluid.DataFeeder(place=place, test_feeder = fluid.DataFeeder(place=place,
feed_list=['image', 'label'], program=test_program) feed_list=['image', 'label'], program=test_program)
fetch_list_test = [test_emb.name] fetch_list_test = [test_emb.name]
real_test_batch_size = self.global_test_batch_size real_test_batch_size = self.global_test_batch_size
if self.checkpoint_dir: if self.checkpoint_dir:
load_checkpoint = True load_checkpoint = True
else: else:
load_checkpoint = False load_checkpoint = False
if load_checkpoint: if load_checkpoint:
self.load_checkpoint(executor=exe, main_program=origin_prog) self.load_checkpoint(executor=exe, main_program=origin_prog)
if self.train_reader is None: if self.train_reader is None:
train_reader = paddle.batch(reader.arc_train( train_reader = paddle.batch(reader.arc_train(
self.dataset_dir, self.num_classes), self.dataset_dir, self.num_classes),
...@@ -773,13 +794,13 @@ class Entry(object): ...@@ -773,13 +794,13 @@ class Entry(object):
feeder = fluid.DataFeeder(place=place, feeder = fluid.DataFeeder(place=place,
feed_list=['image', 'label'], program=origin_prog) feed_list=['image', 'label'], program=origin_prog)
if self.calc_train_acc: if self.calc_train_acc:
fetch_list = [train_loss.name, global_lr.name, fetch_list = [train_loss.name, global_lr.name,
train_acc1.name, train_acc5.name] train_acc1.name, train_acc5.name]
else: else:
fetch_list = [train_loss.name, global_lr.name] fetch_list = [train_loss.name, global_lr.name]
local_time = 0.0 local_time = 0.0
nsamples = 0 nsamples = 0
inspect_steps = 200 inspect_steps = 200
...@@ -818,7 +839,7 @@ class Entry(object): ...@@ -818,7 +839,7 @@ class Entry(object):
local_time = 0 local_time = 0
nsamples = 0 nsamples = 0
local_train_info = [[], [], [], []] local_train_info = [[], [], [], []]
train_loss = np.array(train_info[0]).mean() train_loss = np.array(train_info[0]).mean()
print("End pass {0}, train_loss {1}".format(pass_id, train_loss)) print("End pass {0}, train_loss {1}".format(pass_id, train_loss))
sys.stdout.flush() sys.stdout.flush()
...@@ -850,45 +871,45 @@ class Entry(object): ...@@ -850,45 +871,45 @@ class Entry(object):
embeddings = np.zeros((data.shape[0], _embeddings.shape[1])) embeddings = np.zeros((data.shape[0], _embeddings.shape[1]))
embeddings[start:start+real_test_batch_size, :] = _embeddings[:, :] embeddings[start:start+real_test_batch_size, :] = _embeddings[:, :]
beg = parallel_test_steps * real_test_batch_size beg = parallel_test_steps * real_test_batch_size
while beg < data.shape[0]: while beg < data.shape[0]:
end = min(beg + self.test_batch_size, data.shape[0]) end = min(beg + self.test_batch_size, data.shape[0])
count = end - beg count = end - beg
_data = [] _data = []
for k in xrange(end - self.test_batch_size, end): for k in xrange(end - self.test_batch_size, end):
_data.append((data[k], 0)) _data.append((data[k], 0))
[_embeddings] = exe.run(test_program, [_embeddings] = exe.run(test_program,
fetch_list = fetch_list_test, feed=test_feeder.feed(_data), fetch_list = fetch_list_test, feed=test_feeder.feed(_data),
use_program_cache=True) use_program_cache=True)
_embeddings = _embeddings[0:self.test_batch_size,:] _embeddings = _embeddings[0:self.test_batch_size,:]
embeddings[beg:end, :] = _embeddings[(self.test_batch_size-count):, :] embeddings[beg:end, :] = _embeddings[(self.test_batch_size-count):, :]
beg = end beg = end
embeddings_list.append(embeddings) embeddings_list.append(embeddings)
xnorm = 0.0 xnorm = 0.0
xnorm_cnt = 0 xnorm_cnt = 0
for embed in embeddings_list: for embed in embeddings_list:
xnorm += np.sqrt((embed * embed).sum(axis=1)).sum(axis=0) xnorm += np.sqrt((embed * embed).sum(axis=1)).sum(axis=0)
xnorm_cnt += embed.shape[0] xnorm_cnt += embed.shape[0]
xnorm /= xnorm_cnt xnorm /= xnorm_cnt
embeddings = embeddings_list[0] + embeddings_list[1] embeddings = embeddings_list[0] + embeddings_list[1]
embeddings = sklearn.preprocessing.normalize(embeddings) embeddings = sklearn.preprocessing.normalize(embeddings)
_, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10) _, _, accuracy, val, val_std, far = evaluate(embeddings, issame_list, nrof_folds=10)
acc, std = np.mean(accuracy), np.std(accuracy) acc, std = np.mean(accuracy), np.std(accuracy)
print('[%s][%d]XNorm: %f' % (test_name_list[i], pass_id, xnorm)) print('[%s][%d]XNorm: %f' % (test_name_list[i], pass_id, xnorm))
print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (test_name_list[i], pass_id, acc, std)) print('[%s][%d]Accuracy-Flip: %1.5f+-%1.5f' % (test_name_list[i], pass_id, acc, std))
sys.stdout.flush() sys.stdout.flush()
test_end = time.time() test_end = time.time()
print("test time: {}".format(test_end - test_start)) print("test time: {}".format(test_end - test_start))
#save model #save model
if self.model_save_dir: if self.model_save_dir:
model_save_dir = os.path.join( model_save_dir = os.path.join(
self.model_save_dir, str(pass_id)) self.model_save_dir, str(pass_id))
if not os.path.exists(model_save_dir): if not os.path.exists(model_save_dir):
# may be more than one processes trying # may be more than one processes trying
# to create the directory # to create the directory
try: try:
os.makedirs(model_save_dir) os.makedirs(model_save_dir)
...@@ -921,7 +942,7 @@ class Entry(object): ...@@ -921,7 +942,7 @@ class Entry(object):
#upload model #upload model
if self.model_save_dir and self.fs_name and trainer_id == 0: if self.model_save_dir and self.fs_name and trainer_id == 0:
self.put_files_to_hdfs(self.model_save_dir) self.put_files_to_hdfs(self.model_save_dir)
if __name__ == '__main__': if __name__ == '__main__':
ins = Entry() ins = Entry()
......
...@@ -41,13 +41,13 @@ class DistributedClassificationOptimizer(Optimizer): ...@@ -41,13 +41,13 @@ class DistributedClassificationOptimizer(Optimizer):
def init_fp16_params(self, loss_type, fp16_user_dict): def init_fp16_params(self, loss_type, fp16_user_dict):
# set default value for fp16_params_dict # set default value for fp16_params_dict
fp16_params_dict = dict() fp16_params_dict = dict()
fp16_params_dict['amp_lists']= None
fp16_params_dict['init_loss_scaling'] = 1.0 fp16_params_dict['init_loss_scaling'] = 1.0
fp16_params_dict['incr_every_n_steps'] = 1000 fp16_params_dict['incr_every_n_steps'] = 1000
fp16_params_dict['decr_every_n_nan_or_inf'] = 2 fp16_params_dict['decr_every_n_nan_or_inf'] = 2
fp16_params_dict['incr_ratio'] = 2.0 fp16_params_dict['incr_ratio'] = 2.0
fp16_params_dict['decr_ratio'] = 0.5 fp16_params_dict['decr_ratio'] = 0.5
fp16_params_dict['use_dynamic_loss_scaling'] = True fp16_params_dict['use_dynamic_loss_scaling'] = True
fp16_params_dict['amp_lists']= None
if fp16_user_dict is not None: if fp16_user_dict is not None:
# update fp16_params_dict # update fp16_params_dict
for key in fp16_user_dict: for key in fp16_user_dict:
...@@ -56,8 +56,9 @@ class DistributedClassificationOptimizer(Optimizer): ...@@ -56,8 +56,9 @@ class DistributedClassificationOptimizer(Optimizer):
else: else:
logging.warning("Can't find name '%s' in our fp16_params_dict. " logging.warning("Can't find name '%s' in our fp16_params_dict. "
"Please check your dict key. You can set fp16 params only " "Please check your dict key. You can set fp16 params only "
"in [amp_lists, init_loss_scaling, decr_every_n_nan_or_inf, " "in [init_loss_scaling, incr_every_n_steps, "
"incr_ratio, decr_ratio, use_dynamic_loss_scaling]." % (key)) "decr_every_n_nan_or_inf, incr_ratio, decr_ratio, "
"use_dynamic_loss_scaling, amp_lists]" % (key))
self._amp_lists = fp16_params_dict['amp_lists'] self._amp_lists = fp16_params_dict['amp_lists']
if self._amp_lists is None: if self._amp_lists is None:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册