提交 f7668155 编写于 作者: W wangxiao1021

Merge branch 'master' of https://github.com/PaddlePaddle/hapi

......@@ -103,7 +103,7 @@ def main():
batch_size=config.batch_size,
line_processor=mnli_line_processor)
dev_dataloader = BertDataLoader(
test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
......
......@@ -105,7 +105,7 @@ def main():
mode="leveldb",
phase="train")
dev_dataloader = BertDataLoader(
test_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
......
......@@ -85,8 +85,9 @@ CUDA_VISIBLE_DEVICES=0,1,2,3 python -m paddle.distributed.launch main.py --arch
| [vgg16](https://paddle-hapi.bj.bcebos.com/models/vgg16.pdparams) | 71.92 | 90.65 |
| [mobilenet_v1](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v1_x1.0.pdparams) | 71.16 | 89.89 |
| [mobilenet_v2](https://paddle-hapi.bj.bcebos.com/models/mobilenet_v2_x1.0.pdparams) | 72.30 | 90.74 |
| [darknet53](https://paddle-hapi.bj.bcebos.com/models/darknet53.pdparams) | 78.43 | 94.24 |
上述模型的复现参数请参考scripts下的脚本。
上述部分模型的复现参数请参考scripts下的脚本。需要注意的是darknet要使用image size为256的输入来预测, 即```--image-size 256```
## 参考文献
......
......@@ -24,7 +24,11 @@ from paddle import fluid
class ImageNetDataset(DatasetFolder):
def __init__(self, path, mode='train'):
def __init__(self,
path,
mode='train',
image_size=224,
resize_short_size=256):
super(ImageNetDataset, self).__init__(path)
self.mode = mode
......@@ -32,13 +36,14 @@ class ImageNetDataset(DatasetFolder):
mean=[123.675, 116.28, 103.53], std=[58.395, 57.120, 57.375])
if self.mode == 'train':
self.transform = transforms.Compose([
transforms.RandomResizedCrop(224),
transforms.RandomResizedCrop(image_size),
transforms.RandomHorizontalFlip(),
transforms.Permute(mode='CHW'), normalize
])
else:
self.transform = transforms.Compose([
transforms.Resize(256), transforms.CenterCrop(224),
transforms.Resize(resize_short_size),
transforms.CenterCrop(image_size),
transforms.Permute(mode='CHW'), normalize
])
......@@ -46,7 +51,7 @@ class ImageNetDataset(DatasetFolder):
img_path, label = self.samples[idx]
img = cv2.imread(img_path).astype(np.float32)
label = np.array([label])
return self.transform(img, label)
return self.transform(img), label
def __len__(self):
return len(self.samples)
......@@ -18,8 +18,6 @@ from __future__ import print_function
import argparse
import contextlib
import os
import sys
sys.path.append('../')
import time
import math
......@@ -89,8 +87,16 @@ def main():
labels = [Input([None, 1], 'int64', name='label')]
train_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'train'), mode='train')
val_dataset = ImageNetDataset(os.path.join(FLAGS.data, 'val'), mode='val')
os.path.join(FLAGS.data, 'train'),
mode='train',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
val_dataset = ImageNetDataset(
os.path.join(FLAGS.data, 'val'),
mode='val',
image_size=FLAGS.image_size,
resize_short_size=FLAGS.resize_short_size)
optim = make_optimizer(
np.ceil(
......@@ -176,6 +182,13 @@ if __name__ == '__main__':
parser.add_argument(
"--weight-decay", default=1e-4, type=float, help="weight decay")
parser.add_argument("--momentum", default=0.9, type=float, help="momentum")
parser.add_argument(
"--image-size", default=224, type=int, help="intput image size")
parser.add_argument(
"--resize-short-size",
default=256,
type=int,
help="short size of keeping ratio resize")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
main()
简介
--------
本OCR任务是识别图片单行的字母信息,基于attention的seq2seq结构。 运行本目录下的程序示例需要使用PaddlePaddle develop最新版本。
## 代码结构
```
.
|-- data.py # 数据读取
|-- eval.py # 评估脚本
|-- images # 测试图片
|-- predict.py # 预测脚本
|-- seq2seq_attn.py # 模型
|-- train.py # 训练脚本
`-- utility.py # 公共模块
```
## 训练/评估/预测流程
- 设置GPU环境:
```
export CUDA_VISIBLE_DEVICES=0
```
- 训练
```
python train.py
```
更多参数可以通过`--help`查看。
- 动静切换
```
python train.py --dynamic=True
```
- 评估
```
python eval.py --init_model=checkpoint/final
```
- 预测
目前不支持动态图预测
```
python predict.py --init_model=checkpoint/final --image_path=images/ --dynamic=False --beam_size=3
```
预测结果如下:
```
Image 1: images/112_chubbiness_13557.jpg
0: chubbines
1: chubbiness
2: chubbinesS
Image 2: images/177_Interfiled_40185.jpg
0: Interflied
1: Interfiled
2: InterfIled
Image 3: images/325_dame_19109.jpg
0: da
1: damo
2: dame
Image 4: images/368_fixtures_29232.jpg
0: firtures
1: Firtures
2: fixtures
```
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from os import path
import random
import traceback
import copy
import math
import tarfile
from PIL import Image
import logging
logger = logging.getLogger(__name__)
import paddle
from paddle import fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5"
DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz"
CACHE_DIR_NAME = "attention_data"
SAVED_FILE_NAME = "data.tar.gz"
DATA_DIR_NAME = "data"
TRAIN_DATA_DIR_NAME = "train_images"
TEST_DATA_DIR_NAME = "test_images"
TRAIN_LIST_FILE_NAME = "train.list"
TEST_LIST_FILE_NAME = "test.list"
class Resize(object):
def __init__(self, height=48):
self.interp = Image.NEAREST # Image.ANTIALIAS
self.height = height
def __call__(self, samples):
shape = samples[0][0].size
for i in range(len(samples)):
im = samples[i][0]
im = im.resize((shape[0], self.height), self.interp)
samples[i][0] = im
return samples
class Normalize(object):
def __init__(self,
mean=[127.5],
std=[1.0],
scale=False,
channel_first=True):
self.mean = mean
self.std = std
self.scale = scale
self.channel_first = channel_first
if not (isinstance(self.mean, list) and isinstance(self.std, list) and
isinstance(self.scale, bool)):
raise TypeError("{}: input type is invalid.".format(self))
def __call__(self, samples):
for i in range(len(samples)):
im = samples[i][0]
im = np.array(im).astype(np.float32, copy=False)
im = im[np.newaxis, ...]
mean = np.array(self.mean)[np.newaxis, np.newaxis, :]
std = np.array(self.std)[np.newaxis, np.newaxis, :]
if self.scale:
im = im / 255.0
#im -= mean
im -= 127.5
#im /= std
samples[i][0] = im
return samples
class PadTarget(object):
def __init__(self, SOS=0, EOS=1):
self.SOS = SOS
self.EOS = EOS
def __call__(self, samples):
lens = np.array([len(s[1]) for s in samples], dtype="int64")
max_len = np.max(lens)
for i in range(len(samples)):
label = samples[i][1]
if max_len > len(label):
pad_label = label + [self.EOS] * (max_len - len(label))
else:
pad_label = label
samples[i][1] = np.array([self.SOS] + pad_label, dtype='int64')
# label_out
samples[i].append(np.array(pad_label + [self.EOS], dtype='int64'))
mask = np.zeros((max_len + 1)).astype('float32')
mask[:len(label) + 1] = 1.0
# mask
samples[i].append(np.array(mask, dtype='float32'))
return samples
class BatchSampler(fluid.io.BatchSampler):
def __init__(self,
dataset,
batch_size,
shuffle=False,
drop_last=True,
seed=None):
self._dataset = dataset
self._batch_size = batch_size
self._shuffle = shuffle
self._drop_last = drop_last
self._random = np.random
self._random.seed(seed)
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._device_id = ParallelEnv().dev_id
self._num_samples = int(
math.ceil(len(self._dataset) * 1.0 / self._nranks))
self._total_size = self._num_samples * self._nranks
self._epoch = 0
def __iter__(self):
infos = copy.copy(self._dataset._sample_infos)
skip_num = 0
if self._shuffle:
if self._batch_size == 1:
self._random.RandomState(self._epoch).shuffle(infos)
else: # partial shuffle
infos = sorted(infos, key=lambda x: x.w)
skip_num = random.randint(1, 100)
infos = infos[skip_num:] + infos[:skip_num]
infos += infos[:(self._total_size - len(infos))]
last_size = self._total_size % (self._batch_size * self._nranks)
batches = []
for i in range(self._local_rank * self._batch_size,
len(infos) - last_size,
self._batch_size * self._nranks):
batches.append(infos[i:i + self._batch_size])
if (not self._drop_last) and last_size != 0:
last_local_size = last_size // self._nranks
last_infos = infos[len(infos) - last_size:]
start = self._local_rank * last_local_size
batches.append(last_infos[start:start + last_local_size])
if self._shuffle:
self._random.RandomState(self._epoch).shuffle(batches)
self._epoch += 1
for batch in batches:
batch_indices = [info.idx for info in batch]
yield batch_indices
def __len__(self):
if self._drop_last:
return self._total_size // self._batch_size
else:
return math.ceil(self._total_size / float(self._batch_size))
class SampleInfo(object):
def __init__(self, idx, h, w, im_name, labels):
self.idx = idx
self.h = h
self.w = w
self.im_name = im_name
self.labels = labels
class OCRDataset(paddle.io.Dataset):
def __init__(self, image_dir, anno_file):
self.image_dir = image_dir
self.anno_file = anno_file
self._sample_infos = []
with open(anno_file, 'r') as f:
for i, line in enumerate(f):
w, h, im_name, labels = line.strip().split(' ')
h, w = int(h), int(w)
labels = [int(c) for c in labels.split(',')]
self._sample_infos.append(SampleInfo(i, h, w, im_name, labels))
def __getitem__(self, idx):
info = self._sample_infos[idx]
im_name, labels = info.im_name, info.labels
image = Image.open(path.join(self.image_dir, im_name)).convert('L')
return [image, labels]
def __len__(self):
return len(self._sample_infos)
def train(
root_dir=None,
images_dir=None,
anno_file=None,
shuffle=True, ):
if root_dir is None:
root_dir = download_data()
if images_dir is None:
images_dir = TRAIN_DATA_DIR_NAME
images_dir = path.join(root_dir, TRAIN_DATA_DIR_NAME)
if anno_file is None:
anno_file = TRAIN_LIST_FILE_NAME
anno_file = path.join(root_dir, TRAIN_LIST_FILE_NAME)
return OCRDataset(images_dir, anno_file)
def test(
root_dir=None,
images_dir=None,
anno_file=None,
shuffle=True, ):
if root_dir is None:
root_dir = download_data()
if images_dir is None:
images_dir = TEST_DATA_DIR_NAME
images_dir = path.join(root_dir, TEST_DATA_DIR_NAME)
if anno_file is None:
anno_file = TEST_LIST_FILE_NAME
anno_file = path.join(root_dir, TEST_LIST_FILE_NAME)
return OCRDataset(images_dir, anno_file)
def download_data():
'''Download train and test data.
'''
tar_file = paddle.dataset.common.download(
DATA_URL, CACHE_DIR_NAME, DATA_MD5, save_name=SAVED_FILE_NAME)
data_dir = path.join(path.dirname(tar_file), DATA_DIR_NAME)
if not path.isdir(data_dir):
t = tarfile.open(tar_file, "r:gz")
t.extractall(path=path.dirname(tar_file))
t.close()
return data_dir
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import argparse
import functools
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy
from utility import postprocess
from seq2seq_attn import Seq2SeqAttModel, Seq2SeqAttInferModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('test_images', str, None, "The directory of images to be used for test.")
add_arg('test_list', str, None, "The list file of images to be used for training.")
add_arg('init_model', str, 'checkpoint/final', "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('beam_size', int, 0, "If set beam size, will use beam search.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes)
# yapf: disable
inputs = [
Input([None, 1, 48, 384], "float32", name="pixel"),
Input([None, None], "int64", name="label_in")
]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask")
]
# yapf: enable
model.prepare(
loss_function=WeightCrossEntropy(),
metrics=SeqAccuracy(),
inputs=inputs,
labels=labels,
device=device)
model.load(FLAGS.init_model)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.evaluate(
eval_data=test_loader,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
def beam_search(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
inputs = [
Input(
[None, 1, 48, 384], "float32", name="pixel"), Input(
[None, None], "int64", name="label_in")
]
labels = [
Input(
[None, None], "int64", name="label_out"), Input(
[None, None], "float32", name="mask")
]
model.prepare(
loss_function=None,
metrics=SeqBeamAccuracy(),
inputs=inputs,
labels=labels,
device=device)
model.load(FLAGS.init_model)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.evaluate(
eval_data=test_loader,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
if FLAGS.beam_size:
beam_search(FLAGS)
else:
main(FLAGS)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import random
import numpy as np
import argparse
import functools
from PIL import Image
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.datasets.folder import ImageFolder
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import postprocess, index2word
from seq2seq_attn import Seq2SeqAttInferModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 1, "Minibatch size.")
add_arg('image_path', str, None, "The directory of images to be used for test.")
add_arg('init_model', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
# model hyper paramters
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('beam_size', int, 3, "Beam size for beam search.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ]
model.prepare(inputs=inputs, device=device)
model.load(FLAGS.init_model)
fn = lambda p: Image.open(p).convert('L')
test_dataset = ImageFolder(FLAGS.image_path, loader=fn)
test_collate_fn = BatchCompose([data.Resize(), data.Normalize()])
test_loader = fluid.io.DataLoader(
test_dataset,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
samples = test_dataset.samples
#outputs = model.predict(test_loader)
ins_id = 0
for image, in test_loader:
image = image if FLAGS.dynamic else image[0]
pred = model.test_batch([image])[0]
pred = pred[:, :, np.newaxis] if len(pred.shape) == 2 else pred
pred = np.transpose(pred, [0, 2, 1])
for ins in pred:
impath = samples[ins_id]
ins_id += 1
print('Image {}: {}'.format(ins_id, impath))
for beam_idx, beam in enumerate(ins):
id_list = postprocess(beam)
word_list = index2word(id_list)
sequence = "".join(word_list)
print('{}: {}'.format(beam_idx, sequence))
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
main(FLAGS)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.layers import BeamSearchDecoder
from hapi.text import RNNCell, RNN, DynamicDecode
from hapi.model import Model, Loss
class ConvBNPool(fluid.dygraph.Layer):
def __init__(self,
in_ch,
out_ch,
act="relu",
is_test=False,
pool=True,
use_cudnn=True):
super(ConvBNPool, self).__init__()
self.pool = pool
filter_size = 3
std = (2.0 / (filter_size**2 * in_ch))**0.5
param_0 = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, std))
std = (2.0 / (filter_size**2 * out_ch))**0.5
param_1 = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, std))
self.conv0 = fluid.dygraph.Conv2D(
in_ch,
out_ch,
3,
padding=1,
param_attr=param_0,
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn0 = fluid.dygraph.BatchNorm(out_ch, act=act)
self.conv1 = fluid.dygraph.Conv2D(
out_ch,
out_ch,
filter_size=3,
padding=1,
param_attr=param_1,
bias_attr=False,
act=None,
use_cudnn=use_cudnn)
self.bn1 = fluid.dygraph.BatchNorm(out_ch, act=act)
if self.pool:
self.pool = fluid.dygraph.Pool2D(
pool_size=2,
pool_type='max',
pool_stride=2,
use_cudnn=use_cudnn,
ceil_mode=True)
def forward(self, inputs):
out = self.conv0(inputs)
out = self.bn0(out)
out = self.conv1(out)
out = self.bn1(out)
if self.pool:
out = self.pool(out)
return out
class CNN(fluid.dygraph.Layer):
def __init__(self, in_ch=1, is_test=False):
super(CNN, self).__init__()
self.conv_bn1 = ConvBNPool(in_ch, 16)
self.conv_bn2 = ConvBNPool(16, 32)
self.conv_bn3 = ConvBNPool(32, 64)
self.conv_bn4 = ConvBNPool(64, 128, pool=False)
def forward(self, inputs):
conv = self.conv_bn1(inputs)
conv = self.conv_bn2(conv)
conv = self.conv_bn3(conv)
conv = self.conv_bn4(conv)
return conv
class GRUCell(RNNCell):
def __init__(self,
input_size,
hidden_size,
param_attr=None,
bias_attr=None,
gate_activation='sigmoid',
candidate_activation='tanh',
origin_mode=False):
super(GRUCell, self).__init__()
self.hidden_size = hidden_size
self.fc_layer = fluid.dygraph.Linear(
input_size,
hidden_size * 3,
param_attr=param_attr,
bias_attr=False)
self.gru_unit = fluid.dygraph.GRUUnit(
hidden_size * 3,
param_attr=param_attr,
bias_attr=bias_attr,
activation=candidate_activation,
gate_activation=gate_activation,
origin_mode=origin_mode)
def forward(self, inputs, states):
# step_outputs, new_states = cell(step_inputs, states)
# for GRUCell, `step_outputs` and `new_states` both are hidden
x = self.fc_layer(inputs)
hidden, _, _ = self.gru_unit(x, states)
return hidden, hidden
@property
def state_shape(self):
return [self.hidden_size]
class Encoder(fluid.dygraph.Layer):
def __init__(
self,
in_channel=1,
rnn_hidden_size=200,
decoder_size=128,
is_test=False, ):
super(Encoder, self).__init__()
self.rnn_hidden_size = rnn_hidden_size
self.backbone = CNN(in_ch=in_channel, is_test=is_test)
para_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr(
initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0)
self.gru_fwd = RNN(cell=GRUCell(
input_size=128 * 6,
hidden_size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu'),
is_reverse=False,
time_major=False)
self.gru_bwd = RNN(cell=GRUCell(
input_size=128 * 6,
hidden_size=rnn_hidden_size,
param_attr=para_attr,
bias_attr=bias_attr,
candidate_activation='relu'),
is_reverse=True,
time_major=False)
self.encoded_proj_fc = fluid.dygraph.Linear(
rnn_hidden_size * 2, decoder_size, bias_attr=False)
def forward(self, inputs):
conv_features = self.backbone(inputs)
conv_features = fluid.layers.transpose(
conv_features, perm=[0, 3, 1, 2])
n, w, c, h = conv_features.shape
seq_feature = fluid.layers.reshape(conv_features, [0, -1, c * h])
gru_fwd, _ = self.gru_fwd(seq_feature)
gru_bwd, _ = self.gru_bwd(seq_feature)
encoded_vector = fluid.layers.concat(input=[gru_fwd, gru_bwd], axis=2)
encoded_proj = self.encoded_proj_fc(encoded_vector)
return gru_bwd, encoded_vector, encoded_proj
class Attention(fluid.dygraph.Layer):
"""
Neural Machine Translation by Jointly Learning to Align and Translate.
https://arxiv.org/abs/1409.0473
"""
def __init__(self, decoder_size):
super(Attention, self).__init__()
self.fc1 = fluid.dygraph.Linear(
decoder_size, decoder_size, bias_attr=False)
self.fc2 = fluid.dygraph.Linear(decoder_size, 1, bias_attr=False)
def forward(self, encoder_vec, encoder_proj, decoder_state):
# alignment model, single-layer multilayer perceptron
decoder_state = self.fc1(decoder_state)
decoder_state = fluid.layers.unsqueeze(decoder_state, [1])
e = fluid.layers.elementwise_add(encoder_proj, decoder_state)
e = fluid.layers.tanh(e)
att_scores = self.fc2(e)
att_scores = fluid.layers.squeeze(att_scores, [2])
att_scores = fluid.layers.softmax(att_scores)
context = fluid.layers.elementwise_mul(
x=encoder_vec, y=att_scores, axis=0)
context = fluid.layers.reduce_sum(context, dim=1)
return context
class DecoderCell(RNNCell):
def __init__(self, encoder_size=200, decoder_size=128):
super(DecoderCell, self).__init__()
self.attention = Attention(decoder_size)
self.gru_cell = GRUCell(
input_size=encoder_size * 2 + decoder_size,
hidden_size=decoder_size)
def forward(self, current_word, states, encoder_vec, encoder_proj):
context = self.attention(encoder_vec, encoder_proj, states)
decoder_inputs = fluid.layers.concat([current_word, context], axis=1)
hidden, _ = self.gru_cell(decoder_inputs, states)
return hidden, hidden
class Decoder(fluid.dygraph.Layer):
def __init__(self, num_classes, emb_dim, encoder_size, decoder_size):
super(Decoder, self).__init__()
self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size))
self.fc = fluid.dygraph.Linear(
decoder_size, num_classes + 2, act='softmax')
def forward(self, target, initial_states, encoder_vec, encoder_proj):
out, _ = self.decoder_attention(
target,
initial_states=initial_states,
encoder_vec=encoder_vec,
encoder_proj=encoder_proj)
pred = self.fc(out)
return pred
class Seq2SeqAttModel(Model):
def __init__(
self,
in_channle=1,
encoder_size=200,
decoder_size=128,
emb_dim=128,
num_classes=None, ):
super(Seq2SeqAttModel, self).__init__()
self.encoder = Encoder(in_channle, encoder_size, decoder_size)
self.fc = fluid.dygraph.Linear(
input_dim=encoder_size,
output_dim=decoder_size,
bias_attr=False,
act='relu')
self.embedding = fluid.dygraph.Embedding(
[num_classes + 2, emb_dim], dtype='float32')
self.decoder = Decoder(num_classes, emb_dim, encoder_size,
decoder_size)
def forward(self, inputs, target):
gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
decoder_boot = self.fc(gru_backward[:, 0])
trg_embedding = self.embedding(target)
prediction = self.decoder(trg_embedding, decoder_boot, encoded_vector,
encoded_proj)
return prediction
class Seq2SeqAttInferModel(Seq2SeqAttModel):
def __init__(
self,
in_channle=1,
encoder_size=200,
decoder_size=128,
emb_dim=128,
num_classes=None,
beam_size=0,
bos_id=0,
eos_id=1,
max_out_len=20, ):
super(Seq2SeqAttInferModel, self).__init__(
in_channle, encoder_size, decoder_size, emb_dim, num_classes)
self.beam_size = beam_size
# dynamic decoder for inference
decoder = BeamSearchDecoder(
self.decoder.decoder_attention.cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=self.embedding,
output_fn=self.decoder.fc)
self.infer_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, inputs, *args):
gru_backward, encoded_vector, encoded_proj = self.encoder(inputs)
decoder_boot = self.fc(gru_backward[:, 0])
if self.beam_size:
# Tile the batch dimension with beam_size
encoded_vector = BeamSearchDecoder.tile_beam_merge_with_batch(
encoded_vector, self.beam_size)
encoded_proj = BeamSearchDecoder.tile_beam_merge_with_batch(
encoded_proj, self.beam_size)
# dynamic decoding with beam search
rs, _ = self.infer_decoder(
inits=decoder_boot,
encoder_vec=encoded_vector,
encoder_proj=encoded_proj)
return rs
class WeightCrossEntropy(Loss):
def __init__(self):
super(WeightCrossEntropy, self).__init__(average=False)
def forward(self, outputs, labels):
predict, (label, mask) = outputs[0], labels
loss = layers.cross_entropy(predict, label=label)
loss = layers.elementwise_mul(loss, mask, axis=0)
loss = layers.reduce_sum(loss)
return loss
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import os
import sys
import random
import numpy as np
import argparse
import functools
import paddle.fluid.profiler as profiler
import paddle.fluid as fluid
from hapi.model import Input, set_device
from hapi.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack
from seq2seq_attn import Seq2SeqAttModel, WeightCrossEntropy
import data
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('batch_size', int, 32, "Minibatch size.")
add_arg('epoch', int, 30, "Epoch number.")
add_arg('num_workers', int, 0, "workers number.")
add_arg('lr', float, 0.001, "Learning rate.")
add_arg('lr_decay_strategy', str, "", "Learning rate decay strategy.")
add_arg('checkpoint_path', str, "checkpoint", "The directory the model to be saved to.")
add_arg('train_images', str, None, "The directory of images to be used for training.")
add_arg('train_list', str, None, "The list file of images to be used for training.")
add_arg('test_images', str, None, "The directory of images to be used for test.")
add_arg('test_list', str, None, "The list file of images to be used for training.")
add_arg('resume_path', str, None, "The init model file of directory.")
add_arg('use_gpu', bool, True, "Whether use GPU to train.")
# model hyper paramters
add_arg('encoder_size', int, 200, "Encoder size.")
add_arg('decoder_size', int, 128, "Decoder size.")
add_arg('embedding_dim', int, 128, "Word vector dim.")
add_arg('num_classes', int, 95, "Number classes.")
add_arg('gradient_clip', float, 5.0, "Gradient clip value.")
add_arg('dynamic', bool, False, "Whether to use dygraph.")
# yapf: enable
def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes)
lr = FLAGS.lr
if FLAGS.lr_decay_strategy == "piecewise_decay":
learning_rate = fluid.layers.piecewise_decay(
[200000, 250000], [lr, lr * 0.1, lr * 0.01])
else:
learning_rate = lr
grad_clip = fluid.clip.GradientClipByGlobalNorm(FLAGS.gradient_clip)
optimizer = fluid.optimizer.Adam(
learning_rate=learning_rate,
parameter_list=model.parameters(),
grad_clip=grad_clip)
# yapf: disable
inputs = [
Input([None,1,48,384], "float32", name="pixel"),
Input([None, None], "int64", name="label_in"),
]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask"),
]
# yapf: enable
model.prepare(
optimizer,
WeightCrossEntropy(),
SeqAccuracy(),
inputs=inputs,
labels=labels)
train_dataset = data.train()
train_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
train_sampler = data.BatchSampler(
train_dataset, batch_size=FLAGS.batch_size, shuffle=True)
train_loader = fluid.io.DataLoader(
train_dataset,
batch_sampler=train_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
test_dataset = data.test()
test_collate_fn = BatchCompose(
[data.Resize(), data.Normalize(), data.PadTarget()])
test_sampler = data.BatchSampler(
test_dataset,
batch_size=FLAGS.batch_size,
drop_last=False,
shuffle=False)
test_loader = fluid.io.DataLoader(
test_dataset,
batch_sampler=test_sampler,
places=device,
num_workers=0,
return_list=True,
collate_fn=test_collate_fn)
model.fit(train_data=train_loader,
eval_data=test_loader,
epochs=FLAGS.epoch,
save_dir=FLAGS.checkpoint_path,
callbacks=[LoggerCallBack(10, 2, FLAGS.batch_size)])
if __name__ == '__main__':
FLAGS = parser.parse_args()
print_arguments(FLAGS)
main(FLAGS)
"""Contains common utility functions."""
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import distutils.util
import numpy as np
import paddle.fluid as fluid
import six
from hapi.metrics import Metric
from hapi.callbacks import ProgBarLogger
def print_arguments(args):
"""Print argparse's arguments.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
parser.add_argument("name", default="Jonh", type=str, help="User name.")
args = parser.parse_args()
print_arguments(args)
:param args: Input argparse.Namespace for printing.
:type args: argparse.Namespace
"""
print("----------- Configuration Arguments -----------")
for arg, value in sorted(six.iteritems(vars(args))):
print("%s: %s" % (arg, value))
print("------------------------------------------------")
def add_arguments(argname, type, default, help, argparser, **kwargs):
"""Add argparse's argument.
Usage:
.. code-block:: python
parser = argparse.ArgumentParser()
add_argument("name", str, "Jonh", "User name.", parser)
args = parser.parse_args()
"""
type = distutils.util.strtobool if type == bool else type
argparser.add_argument(
"--" + argname,
default=default,
type=type,
help=help + ' Default: %(default)s.',
**kwargs)
class SeqAccuracy(Metric):
def __init__(self, name=None, *args, **kwargs):
super(SeqAccuracy, self).__init__(*args, **kwargs)
self._name = 'seq_acc'
self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs):
pred = fluid.layers.flatten(output, axis=2)
score, topk = fluid.layers.topk(pred, 1)
return topk, label, mask
def update(self, topk, label, mask, *args, **kwargs):
topk = topk.reshape(label.shape[0], -1)
seq_len = np.sum(mask, -1)
acc = 0
for i in range(label.shape[0]):
l = int(seq_len[i] - 1)
pred = topk[i][:l - 1]
ref = label[i][:l - 1]
if np.array_equal(pred, ref):
self.total += 1
acc += 1
self.count += 1
return float(acc) / label.shape[0]
def reset(self):
self.total = 0.
self.count = 0.
def accumulate(self):
return float(self.total) / self.count
def name(self):
return self._name
class LoggerCallBack(ProgBarLogger):
def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None):
super(LoggerCallBack, self).__init__(log_freq, verbose)
self.train_bs = train_bs
self.eval_bs = eval_bs if eval_bs else train_bs
def on_train_batch_end(self, step, logs=None):
logs = logs or {}
logs['loss'] = [l / self.train_bs for l in logs['loss']]
super(LoggerCallBack, self).on_train_batch_end(step, logs)
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
logs['loss'] = [l / self.train_bs for l in logs['loss']]
super(LoggerCallBack, self).on_epoch_end(epoch, logs)
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
logs['loss'] = [l / self.eval_bs for l in logs['loss']]
super(LoggerCallBack, self).on_eval_batch_end(step, logs)
def on_eval_end(self, logs=None):
logs = logs or {}
logs['loss'] = [l / self.eval_bs for l in logs['loss']]
super(LoggerCallBack, self).on_eval_end(logs)
def index2word(ids):
return [chr(int(k + 33)) for k in ids]
def postprocess(seq, bos_idx=0, eos_idx=1):
if type(seq) is np.ndarray:
seq = seq.tolist()
eos_pos = len(seq) - 1
for i, idx in enumerate(seq):
if idx == eos_idx:
eos_pos = i
break
seq = [
idx for idx in seq[:eos_pos + 1] if idx != bos_idx and idx != eos_idx
]
return seq
class SeqBeamAccuracy(Metric):
def __init__(self, name=None, *args, **kwargs):
super(SeqBeamAccuracy, self).__init__(*args, **kwargs)
self._name = 'seq_acc'
self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs):
return output, label, mask
def update(self, preds, labels, masks, *args, **kwargs):
preds = preds[:, :, np.newaxis] if len(preds.shape) == 2 else preds
preds = np.transpose(preds, [0, 2, 1])
seq_len = np.sum(masks, -1)
acc = 0
for i in range(labels.shape[0]):
l = int(seq_len[i] - 1)
#ref = labels[i][: l - 1]
ref = np.array(postprocess(labels[i]))
pred = preds[i]
for idx, beam in enumerate(pred):
beam_pred = np.array(postprocess(beam))
if np.array_equal(beam_pred, ref):
self.total += 1
acc += 1
break
self.count += 1
return float(acc) / labels.shape[0]
def reset(self):
self.total = 0.
self.count = 0.
def accumulate(self):
return float(self.total) / self.count
def name(self):
return self._name
运行本目录下的范例模型需要安装PaddlePaddle Fluid 1.7版。如果您的 PaddlePaddle 安装版本低于此要求,请按照[安装文档](https://www.paddlepaddle.org.cn/#quick-start)中的说明更新 PaddlePaddle 安装版本。
# Sequence to Sequence (Seq2Seq)
以下是本范例模型的简要目录结构及说明:
```
.
├── README.md # 文档,本文件
├── args.py # 训练、预测以及模型参数配置程序
├── reader.py # 数据读入程序
├── download.py # 数据下载程序
├── train.py # 训练主程序
├── predict.py # 预测主程序
├── seq2seq_attn.py # 带注意力机制的翻译模型程序
└── seq2seq_base.py # 无注意力机制的翻译模型程序
```
## 简介
Sequence to Sequence (Seq2Seq),使用编码器-解码器(Encoder-Decoder)结构,用编码器将源序列编码成vector,再用解码器将该vector解码为目标序列。Seq2Seq 广泛应用于机器翻译,自动对话机器人,文档摘要自动生成,图片描述自动生成等任务中。
本目录包含Seq2Seq的一个经典样例:机器翻译,实现了一个base model(不带attention机制),一个带attention机制的翻译模型。Seq2Seq翻译模型,模拟了人类在进行翻译类任务时的行为:先解析源语言,理解其含义,再根据该含义来写出目标语言的语句。更多关于机器翻译的具体原理和数学表达式,我们推荐参考飞桨官网[机器翻译案例](https://www.paddlepaddle.org.cn/documentation/docs/zh/user_guides/nlp_case/machine_translation/README.cn.html)
## 模型概览
本模型中,在编码器方面,我们采用了基于LSTM的多层的RNN encoder;在解码器方面,我们使用了带注意力(Attention)机制的RNN decoder,并同时提供了一个不带注意力机制的解码器实现作为对比。在预测时我们使用柱搜索(beam search)算法来生成翻译的目标语句。
## 数据介绍
本教程使用[IWSLT'15 English-Vietnamese data ](https://nlp.stanford.edu/projects/nmt/)数据集中的英语到越南语的数据作为训练语料,tst2012的数据作为开发集,tst2013的数据作为测试集
### 数据获取
```
python download.py
```
## 模型训练
执行以下命令即可训练带有注意力机制的Seq2Seq机器翻译模型:
```sh
export CUDA_VISIBLE_DEVICES=0
python train.py \
--src_lang en --tar_lang vi \
--attention True \
--num_layers 2 \
--hidden_size 512 \
--src_vocab_size 17191 \
--tar_vocab_size 7709 \
--batch_size 128 \
--dropout 0.2 \
--init_scale 0.1 \
--max_grad_norm 5.0 \
--train_data_prefix data/en-vi/train \
--eval_data_prefix data/en-vi/tst2012 \
--test_data_prefix data/en-vi/tst2013 \
--vocab_prefix data/en-vi/vocab \
--use_gpu True \
--model_path ./attention_models
```
可以通过修改 `attention` 参数为False来训练不带注意力机制的Seq2Seq模型,各参数的具体说明请参阅 `args.py` 。训练程序会在每个epoch训练结束之后,save一次模型。
默认使用动态图模式进行训练,可以通过设置 `eager_run` 参数为False来以静态图模式进行训练,如下:
```sh
export CUDA_VISIBLE_DEVICES=0
python train.py \
--src_lang en --tar_lang vi \
--attention True \
--num_layers 2 \
--hidden_size 512 \
--src_vocab_size 17191 \
--tar_vocab_size 7709 \
--batch_size 128 \
--dropout 0.2 \
--init_scale 0.1 \
--max_grad_norm 5.0 \
--train_data_prefix data/en-vi/train \
--eval_data_prefix data/en-vi/tst2012 \
--test_data_prefix data/en-vi/tst2013 \
--vocab_prefix data/en-vi/vocab \
--use_gpu True \
--model_path ./attention_models \
--eager_run False
```
## 模型预测
训练完成之后,可以使用保存的模型(由 `--reload_model` 指定)对test的数据集(由 `--infer_file` 指定)进行beam search解码,命令如下:
```sh
export CUDA_VISIBLE_DEVICES=0
python infer.py \
--attention True \
--src_lang en --tar_lang vi \
--num_layers 2 \
--hidden_size 512 \
--src_vocab_size 17191 \
--tar_vocab_size 7709 \
--batch_size 128 \
--dropout 0.2 \
--init_scale 0.1 \
--max_grad_norm 5.0 \
--vocab_prefix data/en-vi/vocab \
--infer_file data/en-vi/tst2013.en \
--reload_model attention_models/10 \
--infer_output_file infer_output.txt \
--beam_size 10 \
--use_gpu True
```
各参数的具体说明请参阅 `args.py` ,注意预测时所用模型超参数需和训练时一致。和训练类似,预测时同样可以以静态图模式进行,如下:
```sh
export CUDA_VISIBLE_DEVICES=0
python infer.py \
--attention True \
--src_lang en --tar_lang vi \
--num_layers 2 \
--hidden_size 512 \
--src_vocab_size 17191 \
--tar_vocab_size 7709 \
--batch_size 128 \
--dropout 0.2 \
--init_scale 0.1 \
--max_grad_norm 5.0 \
--vocab_prefix data/en-vi/vocab \
--infer_file data/en-vi/tst2013.en \
--reload_model attention_models/10 \
--infer_output_file infer_output.txt \
--beam_size 10 \
--use_gpu True \
--eager_run False
```
## 效果评价
使用 [*multi-bleu.perl*](https://github.com/moses-smt/mosesdecoder.git) 工具来评价模型预测的翻译质量,使用方法如下:
```sh
mosesdecoder/scripts/generic/multi-bleu.perl tst2013.vi < infer_output.txt
```
每个模型分别训练了10次,单次取第10个epoch保存的模型进行预测,取beam_size=10。效果如下(为了便于观察,对10次结果按照升序进行了排序):
```
> no attention
tst2012 BLEU:
[10.75 10.85 10.9 10.94 10.97 11.01 11.01 11.04 11.13 11.4]
tst2013 BLEU:
[10.71 10.71 10.74 10.76 10.91 10.94 11.02 11.16 11.21 11.44]
> with attention
tst2012 BLEU:
[21.14 22.34 22.54 22.65 22.71 22.71 23.08 23.15 23.3 23.4]
tst2013 BLEU:
[23.41 24.79 25.11 25.12 25.19 25.24 25.39 25.61 25.61 25.63]
```
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import distutils.util
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--train_data_prefix", type=str, help="file prefix for train data")
parser.add_argument(
"--eval_data_prefix", type=str, help="file prefix for eval data")
parser.add_argument(
"--test_data_prefix", type=str, help="file prefix for test data")
parser.add_argument(
"--vocab_prefix", type=str, help="file prefix for vocab")
parser.add_argument("--src_lang", type=str, help="source language suffix")
parser.add_argument("--tar_lang", type=str, help="target language suffix")
parser.add_argument(
"--attention",
type=eval,
default=False,
help="Whether use attention model")
parser.add_argument(
"--optimizer",
type=str,
default='adam',
help="optimizer to use, only supprt[sgd|adam]")
parser.add_argument(
"--learning_rate",
type=float,
default=0.001,
help="learning rate for optimizer")
parser.add_argument(
"--num_layers",
type=int,
default=1,
help="layers number of encoder and decoder")
parser.add_argument(
"--hidden_size",
type=int,
default=100,
help="hidden size of encoder and decoder")
parser.add_argument("--src_vocab_size", type=int, help="source vocab size")
parser.add_argument("--tar_vocab_size", type=int, help="target vocab size")
parser.add_argument(
"--batch_size", type=int, help="batch size of each step")
parser.add_argument(
"--max_epoch", type=int, default=12, help="max epoch for the training")
parser.add_argument(
"--max_len",
type=int,
default=50,
help="max length for source and target sentence")
parser.add_argument(
"--dropout", type=float, default=0.0, help="drop probability")
parser.add_argument(
"--init_scale",
type=float,
default=0.0,
help="init scale for parameter")
parser.add_argument(
"--max_grad_norm",
type=float,
default=5.0,
help="max grad norm for global norm clip")
parser.add_argument(
"--log_freq",
type=int,
default=100,
help="The frequency to print training logs")
parser.add_argument(
"--model_path",
type=str,
default='model',
help="model path for model to save")
parser.add_argument(
"--reload_model", type=str, help="reload model to inference")
parser.add_argument(
"--infer_file", type=str, help="file name for inference")
parser.add_argument(
"--infer_output_file",
type=str,
default='infer_output',
help="file name for inference output")
parser.add_argument(
"--beam_size", type=int, default=10, help="file name for inference")
parser.add_argument(
'--use_gpu',
type=eval,
default=False,
help='Whether using gpu [True|False]')
parser.add_argument(
'--eager_run', type=eval, default=False, help='Whether to use dygraph')
parser.add_argument(
"--enable_ce",
action='store_true',
help="The flag indicating whether to run the task "
"for continuous evaluation.")
parser.add_argument(
"--profile", action='store_true', help="Whether enable the profile.")
# NOTE: profiler args, used for benchmark
parser.add_argument(
"--profiler_path",
type=str,
default='./seq2seq.profile',
help="the profiler output file path. (used for benchmark)")
args = parser.parse_args()
return args
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
'''
Script for downloading training data.
'''
import os
import urllib
import sys
if sys.version_info >= (3, 0):
import urllib.request
import zipfile
URLLIB = urllib
if sys.version_info >= (3, 0):
URLLIB = urllib.request
remote_path = 'https://nlp.stanford.edu/projects/nmt/data/iwslt15.en-vi'
base_path = 'data'
tar_path = os.path.join(base_path, 'en-vi')
filenames = [
'train.en', 'train.vi', 'tst2012.en', 'tst2012.vi', 'tst2013.en',
'tst2013.vi', 'vocab.en', 'vocab.vi'
]
def main(arguments):
print("Downloading data......")
if not os.path.exists(tar_path):
if not os.path.exists(base_path):
os.mkdir(base_path)
os.mkdir(tar_path)
for filename in filenames:
url = remote_path + '/' + filename
tar_file = os.path.join(tar_path, filename)
URLLIB.urlretrieve(url, tar_file)
print("Downloaded sucess......")
if __name__ == '__main__':
sys.exit(main(sys.argv[1:]))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import io
import random
from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.fluid.io import DataLoader
from hapi.model import Input, set_device
from args import parse_args
from seq2seq_base import BaseInferModel
from seq2seq_attn import AttentionInferModel
from reader import Seq2SeqDataset, Seq2SeqBatchSampler, SortType, prepare_infer_input
def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
output_eos=False):
"""
Post-process the decoded sequence.
"""
eos_pos = len(seq) - 1
for i, idx in enumerate(seq):
if idx == eos_idx:
eos_pos = i
break
seq = [
idx for idx in seq[:eos_pos + 1]
if (output_bos or idx != bos_idx) and (output_eos or idx != eos_idx)
]
return seq
def do_predict(args):
device = set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
# define model
inputs = [
Input(
[None, None], "int64", name="src_word"),
Input(
[None], "int64", name="src_length"),
]
# def dataloader
dataset = Seq2SeqDataset(
fpattern=args.infer_file,
src_vocab_fpath=args.vocab_prefix + "." + args.src_lang,
trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang,
token_delimiter=None,
start_mark="<s>",
end_mark="</s>",
unk_mark="<unk>")
trg_idx2word = Seq2SeqDataset.load_dict(
dict_path=args.vocab_prefix + "." + args.tar_lang, reverse=True)
(args.src_vocab_size, args.trg_vocab_size, bos_id, eos_id,
unk_id) = dataset.get_vocab_summary()
batch_sampler = Seq2SeqBatchSampler(
dataset=dataset, use_token_batch=False, batch_size=args.batch_size)
data_loader = DataLoader(
dataset=dataset,
batch_sampler=batch_sampler,
places=device,
feed_list=None
if fluid.in_dygraph_mode() else [x.forward() for x in inputs],
collate_fn=partial(
prepare_infer_input, bos_id=bos_id, eos_id=eos_id, pad_id=eos_id),
num_workers=0,
return_list=True)
model_maker = AttentionInferModel if args.attention else BaseInferModel
model = model_maker(
args.src_vocab_size,
args.tar_vocab_size,
args.hidden_size,
args.hidden_size,
args.num_layers,
args.dropout,
bos_id=bos_id,
eos_id=eos_id,
beam_size=args.beam_size,
max_out_len=256)
model.prepare(inputs=inputs)
# load the trained model
assert args.reload_model, (
"Please set reload_model to load the infer model.")
model.load(args.reload_model)
# TODO(guosheng): use model.predict when support variant length
with io.open(args.infer_output_file, 'w', encoding='utf-8') as f:
for data in data_loader():
finished_seq = model.test_batch(inputs=flatten(data))[0]
finished_seq = finished_seq[:, :, np.newaxis] if len(
finished_seq.shape) == 2 else finished_seq
finished_seq = np.transpose(finished_seq, [0, 2, 1])
for ins in finished_seq:
for beam_idx, beam in enumerate(ins):
id_list = post_process_seq(beam, bos_id, eos_id)
word_list = [trg_idx2word[id] for id in id_list]
sequence = " ".join(word_list) + "\n"
f.write(sequence)
break
if __name__ == "__main__":
args = parse_args()
do_predict(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import glob
import six
import os
import io
import itertools
from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.dygraph.parallel import ParallelEnv
from paddle.fluid.io import BatchSampler, DataLoader, Dataset
def create_data_loader(args, device, for_train=True):
data_loaders = [None, None]
data_prefixes = [args.train_data_prefix, args.eval_data_prefix
] if args.eval_data_prefix else [args.train_data_prefix]
for i, data_prefix in enumerate(data_prefixes):
dataset = Seq2SeqDataset(
fpattern=data_prefix + "." + args.src_lang,
trg_fpattern=data_prefix + "." + args.tar_lang,
src_vocab_fpath=args.vocab_prefix + "." + args.src_lang,
trg_vocab_fpath=args.vocab_prefix + "." + args.tar_lang,
token_delimiter=None,
start_mark="<s>",
end_mark="</s>",
unk_mark="<unk>",
max_length=args.max_len if i == 0 else None,
truncate=True,
trg_add_bos_eos=True)
(args.src_vocab_size, args.tar_vocab_size, bos_id, eos_id,
unk_id) = dataset.get_vocab_summary()
batch_sampler = Seq2SeqBatchSampler(
dataset=dataset,
use_token_batch=False,
batch_size=args.batch_size,
pool_size=args.batch_size * 20,
sort_type=SortType.POOL,
shuffle=False if args.enable_ce else True,
distribute_mode=True if i == 0 else False)
data_loader = DataLoader(
dataset=dataset,
batch_sampler=batch_sampler,
places=device,
collate_fn=partial(
prepare_train_input,
bos_id=bos_id,
eos_id=eos_id,
pad_id=eos_id),
num_workers=0,
return_list=True)
data_loaders[i] = data_loader
return data_loaders
def prepare_train_input(insts, bos_id, eos_id, pad_id):
src, src_length = pad_batch_data(
[inst[0] for inst in insts], pad_id=pad_id)
trg, trg_length = pad_batch_data(
[inst[1] for inst in insts], pad_id=pad_id)
trg_length = trg_length - 1
return src, src_length, trg[:, :-1], trg_length, trg[:, 1:, np.newaxis]
def prepare_infer_input(insts, bos_id, eos_id, pad_id):
src, src_length = pad_batch_data(insts, pad_id=pad_id)
return src, src_length
def pad_batch_data(insts, pad_id):
"""
Pad the instances to the max sequence length in batch, and generate the
corresponding position data and attention bias.
"""
inst_lens = np.array([len(inst) for inst in insts], dtype="int64")
max_len = np.max(inst_lens)
inst_data = np.array(
[inst + [pad_id] * (max_len - len(inst)) for inst in insts],
dtype="int64")
return inst_data, inst_lens
class SortType(object):
GLOBAL = 'global'
POOL = 'pool'
NONE = "none"
class Converter(object):
def __init__(self, vocab, beg, end, unk, delimiter, add_beg, add_end):
self._vocab = vocab
self._beg = beg
self._end = end
self._unk = unk
self._delimiter = delimiter
self._add_beg = add_beg
self._add_end = add_end
def __call__(self, sentence):
return ([self._beg] if self._add_beg else []) + [
self._vocab.get(w, self._unk)
for w in sentence.split(self._delimiter)
] + ([self._end] if self._add_end else [])
class ComposedConverter(object):
def __init__(self, converters):
self._converters = converters
def __call__(self, fields):
return [
converter(field)
for field, converter in zip(fields, self._converters)
]
class SentenceBatchCreator(object):
def __init__(self, batch_size):
self.batch = []
self._batch_size = batch_size
def append(self, info):
self.batch.append(info)
if len(self.batch) == self._batch_size:
tmp = self.batch
self.batch = []
return tmp
class TokenBatchCreator(object):
def __init__(self, batch_size):
self.batch = []
self.max_len = -1
self._batch_size = batch_size
def append(self, info):
cur_len = info.max_len
max_len = max(self.max_len, cur_len)
if max_len * (len(self.batch) + 1) > self._batch_size:
result = self.batch
self.batch = [info]
self.max_len = cur_len
return result
else:
self.max_len = max_len
self.batch.append(info)
class SampleInfo(object):
def __init__(self, i, lens):
self.i = i
self.lens = lens
self.max_len = lens[0] # to be consitent with the original reader
def get_ranges(self, min_length=None, max_length=None, truncate=False):
ranges = []
# source
if (min_length is None or self.lens[0] >= min_length) and (
max_length is None or self.lens[0] <= max_length or truncate):
end = max_length if truncate and max_length else self.lens[0]
ranges.append([0, end])
# target
if len(self.lens) == 2:
if (min_length is None or self.lens[1] >= min_length) and (
max_length is None or self.lens[1] <= max_length + 2 or
truncate):
end = max_length + 2 if truncate and max_length else self.lens[
1]
ranges.append([0, end])
return ranges if len(ranges) == len(self.lens) else None
class MinMaxFilter(object):
def __init__(self, max_len, min_len, underlying_creator):
self._min_len = min_len
self._max_len = max_len
self._creator = underlying_creator
def append(self, info):
if (self._min_len is None or info.min_len >= self._min_len) and (
self._max_len is None or info.max_len <= self._max_len):
return self._creator.append(info)
@property
def batch(self):
return self._creator.batch
class Seq2SeqDataset(Dataset):
def __init__(self,
src_vocab_fpath,
trg_vocab_fpath,
fpattern,
field_delimiter="\t",
token_delimiter=" ",
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
trg_fpattern=None,
trg_add_bos_eos=False,
byte_data=False,
min_length=None,
max_length=None,
truncate=False):
if byte_data:
# The WMT16 bpe data used here seems including bytes can not be
# decoded by utf8. Thus convert str to bytes, and use byte data
field_delimiter = field_delimiter.encode("utf8")
token_delimiter = token_delimiter.encode("utf8")
start_mark = start_mark.encode("utf8")
end_mark = end_mark.encode("utf8")
unk_mark = unk_mark.encode("utf8")
self._byte_data = byte_data
self._src_vocab = self.load_dict(src_vocab_fpath, byte_data=byte_data)
self._trg_vocab = self.load_dict(trg_vocab_fpath, byte_data=byte_data)
self._bos_idx = self._src_vocab[start_mark]
self._eos_idx = self._src_vocab[end_mark]
self._unk_idx = self._src_vocab[unk_mark]
self._field_delimiter = field_delimiter
self._token_delimiter = token_delimiter
self._min_length = min_length
self._max_length = max_length
self._truncate = truncate
self._trg_add_bos_eos = trg_add_bos_eos
self.load_src_trg_ids(fpattern, trg_fpattern)
def load_src_trg_ids(self, fpattern, trg_fpattern=None):
src_converter = Converter(
vocab=self._src_vocab,
beg=self._bos_idx,
end=self._eos_idx,
unk=self._unk_idx,
delimiter=self._token_delimiter,
add_beg=False,
add_end=False)
trg_converter = Converter(
vocab=self._trg_vocab,
beg=self._bos_idx,
end=self._eos_idx,
unk=self._unk_idx,
delimiter=self._token_delimiter,
add_beg=True if self._trg_add_bos_eos else False,
add_end=True if self._trg_add_bos_eos else False)
converters = ComposedConverter([src_converter, trg_converter])
self._src_seq_ids = []
self._trg_seq_ids = []
self._sample_infos = []
slots = [self._src_seq_ids, self._trg_seq_ids]
for i, line in enumerate(self._load_lines(fpattern, trg_fpattern)):
fields = converters(line)
lens = [len(field) for field in fields]
sample = SampleInfo(i, lens)
field_ranges = sample.get_ranges(self._min_length,
self._max_length, self._truncate)
if field_ranges:
for field, field_range, slot in zip(fields, field_ranges,
slots):
slot.append(field[field_range[0]:field_range[1]])
self._sample_infos.append(sample)
def _load_lines(self, fpattern, trg_fpattern=None):
fpaths = glob.glob(fpattern)
fpaths = sorted(fpaths) # TODO: Add custum sort
assert len(fpaths) > 0, "no matching file to the provided data path"
(f_mode, f_encoding,
endl) = ("rb", None, b"\n") if self._byte_data else ("r", "utf8",
"\n")
if trg_fpattern is None:
for fpath in fpaths:
with io.open(fpath, f_mode, encoding=f_encoding) as f:
for line in f:
fields = line.strip(endl).split(self._field_delimiter)
yield fields
else:
# separated source and target language data files
# assume we can get aligned data by sort the two language files
# TODO: Need more rigorous check
trg_fpaths = glob.glob(trg_fpattern)
trg_fpaths = sorted(trg_fpaths)
assert len(fpaths) == len(
trg_fpaths
), "the number of source language data files must equal \
with that of source language"
for fpath, trg_fpath in zip(fpaths, trg_fpaths):
with io.open(fpath, f_mode, encoding=f_encoding) as f:
with io.open(
trg_fpath, f_mode, encoding=f_encoding) as trg_f:
for line in zip(f, trg_f):
fields = [field.strip(endl) for field in line]
yield fields
@staticmethod
def load_dict(dict_path, reverse=False, byte_data=False):
word_dict = {}
(f_mode, f_encoding,
endl) = ("rb", None, b"\n") if byte_data else ("r", "utf8", "\n")
with io.open(dict_path, f_mode, encoding=f_encoding) as fdict:
for idx, line in enumerate(fdict):
if reverse:
word_dict[idx] = line.strip(endl)
else:
word_dict[line.strip(endl)] = idx
return word_dict
def get_vocab_summary(self):
return len(self._src_vocab), len(
self._trg_vocab), self._bos_idx, self._eos_idx, self._unk_idx
def __getitem__(self, idx):
return (self._src_seq_ids[idx], self._trg_seq_ids[idx]
) if self._trg_seq_ids else self._src_seq_ids[idx]
def __len__(self):
return len(self._sample_infos)
class Seq2SeqBatchSampler(BatchSampler):
def __init__(self,
dataset,
batch_size,
pool_size=10000,
sort_type=SortType.NONE,
min_length=None,
max_length=None,
shuffle=False,
shuffle_batch=False,
use_token_batch=False,
clip_last_batch=False,
distribute_mode=True,
seed=0):
for arg, value in locals().items():
if arg != "self":
setattr(self, "_" + arg, value)
self._random = np.random
self._random.seed(seed)
# for multi-devices
self._distribute_mode = distribute_mode
self._nranks = ParallelEnv().nranks
self._local_rank = ParallelEnv().local_rank
self._device_id = ParallelEnv().dev_id
def __iter__(self):
# global sort or global shuffle
if self._sort_type == SortType.GLOBAL:
infos = sorted(
self._dataset._sample_infos, key=lambda x: x.max_len)
else:
if self._shuffle:
infos = self._dataset._sample_infos
self._random.shuffle(infos)
else:
infos = self._dataset._sample_infos
if self._sort_type == SortType.POOL:
reverse = True
for i in range(0, len(infos), self._pool_size):
# to avoid placing short next to long sentences
reverse = False # not reverse
infos[i:i + self._pool_size] = sorted(
infos[i:i + self._pool_size],
key=lambda x: x.max_len,
reverse=reverse)
batches = []
batch_creator = TokenBatchCreator(
self.
_batch_size) if self._use_token_batch else SentenceBatchCreator(
self._batch_size * self._nranks)
batch_creator = MinMaxFilter(self._max_length, self._min_length,
batch_creator)
for info in infos:
batch = batch_creator.append(info)
if batch is not None:
batches.append(batch)
if not self._clip_last_batch and len(batch_creator.batch) != 0:
batches.append(batch_creator.batch)
if self._shuffle_batch:
self._random.shuffle(batches)
if not self._use_token_batch:
# when producing batches according to sequence number, to confirm
# neighbor batches which would be feed and run parallel have similar
# length (thus similar computational cost) after shuffle, we as take
# them as a whole when shuffling and split here
batches = [[
batch[self._batch_size * i:self._batch_size * (i + 1)]
for i in range(self._nranks)
] for batch in batches]
batches = list(itertools.chain.from_iterable(batches))
# for multi-device
for batch_id, batch in enumerate(batches):
if not self._distribute_mode or (
batch_id % self._nranks == self._local_rank):
batch_indices = [info.i for info in batch]
yield batch_indices
if self._distribute_mode and len(batches) % self._nranks != 0:
if self._local_rank >= len(batches) % self._nranks:
# use previous data to pad
yield batch_indices
def __len__(self):
if not self._use_token_batch:
batch_number = (
len(self._dataset) + self._batch_size * self._nranks - 1) // (
self._batch_size * self._nranks)
else:
# TODO(guosheng): fix the uncertain length
batch_number = 1
return batch_number
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
from hapi.model import Model, Loss
from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from seq2seq_base import Encoder
class AttentionLayer(Layer):
def __init__(self, hidden_size, bias=False, init_scale=0.1):
super(AttentionLayer, self).__init__()
self.input_proj = Linear(
hidden_size,
hidden_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)),
bias_attr=bias)
self.output_proj = Linear(
hidden_size + hidden_size,
hidden_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)),
bias_attr=bias)
def forward(self, hidden, encoder_output, encoder_padding_mask):
# query = self.input_proj(hidden)
encoder_output = self.input_proj(encoder_output)
attn_scores = layers.matmul(
layers.unsqueeze(hidden, [1]), encoder_output, transpose_y=True)
if encoder_padding_mask is not None:
attn_scores = layers.elementwise_add(attn_scores,
encoder_padding_mask)
attn_scores = layers.softmax(attn_scores)
attn_out = layers.squeeze(
layers.matmul(attn_scores, encoder_output), [1])
attn_out = layers.concat([attn_out, hidden], 1)
attn_out = self.output_proj(attn_out)
return attn_out
class DecoderCell(RNNCell):
def __init__(self,
num_layers,
input_size,
hidden_size,
dropout_prob=0.,
init_scale=0.1):
super(DecoderCell, self).__init__()
self.dropout_prob = dropout_prob
# use add_sublayer to add multi-layers
self.lstm_cells = []
for i in range(num_layers):
self.lstm_cells.append(
self.add_sublayer(
"lstm_%d" % i,
BasicLSTMCell(
input_size=input_size + hidden_size
if i == 0 else hidden_size,
hidden_size=hidden_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)))))
self.attention_layer = AttentionLayer(hidden_size)
def forward(self,
step_input,
states,
encoder_output,
encoder_padding_mask=None):
lstm_states, input_feed = states
new_lstm_states = []
step_input = layers.concat([step_input, input_feed], 1)
for i, lstm_cell in enumerate(self.lstm_cells):
out, new_lstm_state = lstm_cell(step_input, lstm_states[i])
step_input = layers.dropout(
out,
self.dropout_prob,
dropout_implementation='upscale_in_train'
) if self.dropout_prob > 0 else out
new_lstm_states.append(new_lstm_state)
out = self.attention_layer(step_input, encoder_output,
encoder_padding_mask)
return out, [new_lstm_states, out]
class Decoder(Layer):
def __init__(self,
vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
init_scale=0.1):
super(Decoder, self).__init__()
self.embedder = Embedding(
size=[vocab_size, embed_dim],
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)))
self.lstm_attention = RNN(DecoderCell(
num_layers, embed_dim, hidden_size, dropout_prob, init_scale),
is_reverse=False,
time_major=False)
self.output_layer = Linear(
hidden_size,
vocab_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)),
bias_attr=False)
def forward(self, target, decoder_initial_states, encoder_output,
encoder_padding_mask):
inputs = self.embedder(target)
decoder_output, _ = self.lstm_attention(
inputs,
initial_states=decoder_initial_states,
encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask)
predict = self.output_layer(decoder_output)
return predict
class AttentionModel(Model):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
init_scale=0.1):
super(AttentionModel, self).__init__()
self.hidden_size = hidden_size
self.encoder = Encoder(src_vocab_size, embed_dim, hidden_size,
num_layers, dropout_prob, init_scale)
self.decoder = Decoder(trg_vocab_size, embed_dim, hidden_size,
num_layers, dropout_prob, init_scale)
def forward(self, src, src_length, trg):
# encoder
encoder_output, encoder_final_state = self.encoder(src, src_length)
# decoder initial states: use input_feed and the structure is
# [[h,c] * num_layers, input_feed], consistent with DecoderCell.states
decoder_initial_states = [
encoder_final_state,
self.decoder.lstm_attention.cell.get_initial_states(
batch_ref=encoder_output, shape=[self.hidden_size])
]
# attention mask to avoid paying attention on padddings
src_mask = layers.sequence_mask(
src_length,
maxlen=layers.shape(src)[1],
dtype=encoder_output.dtype)
encoder_padding_mask = (src_mask - 1.0) * 1e9
encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
# decoder with attentioon
predict = self.decoder(trg, decoder_initial_states, encoder_output,
encoder_padding_mask)
return predict
class AttentionInferModel(AttentionModel):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
bos_id=0,
eos_id=1,
beam_size=4,
max_out_len=256):
args = dict(locals())
args.pop("self")
args.pop("__class__", None) # py3
self.bos_id = args.pop("bos_id")
self.eos_id = args.pop("eos_id")
self.beam_size = args.pop("beam_size")
self.max_out_len = args.pop("max_out_len")
super(AttentionInferModel, self).__init__(**args)
# dynamic decoder for inference
decoder = BeamSearchDecoder(
self.decoder.lstm_attention.cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=self.decoder.embedder,
output_fn=self.decoder.output_layer)
self.beam_search_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, src, src_length):
# encoding
encoder_output, encoder_final_state = self.encoder(src, src_length)
# decoder initial states
decoder_initial_states = [
encoder_final_state,
self.decoder.lstm_attention.cell.get_initial_states(
batch_ref=encoder_output, shape=[self.hidden_size])
]
# attention mask to avoid paying attention on padddings
src_mask = layers.sequence_mask(
src_length,
maxlen=layers.shape(src)[1],
dtype=encoder_output.dtype)
encoder_padding_mask = (src_mask - 1.0) * 1e9
encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
# Tile the batch dimension with beam_size
encoder_output = BeamSearchDecoder.tile_beam_merge_with_batch(
encoder_output, self.beam_size)
encoder_padding_mask = BeamSearchDecoder.tile_beam_merge_with_batch(
encoder_padding_mask, self.beam_size)
# dynamic decoding with beam search
rs, _ = self.beam_search_decoder(
inits=decoder_initial_states,
encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask)
return rs
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
from hapi.model import Model, Loss
from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
class CrossEntropyCriterion(Loss):
def __init__(self):
super(CrossEntropyCriterion, self).__init__()
def forward(self, outputs, labels):
predict, (trg_length, label) = outputs[0], labels
# for target padding mask
mask = layers.sequence_mask(
trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype)
cost = layers.softmax_with_cross_entropy(
logits=predict, label=label, soft_label=False)
masked_cost = layers.elementwise_mul(cost, mask, axis=0)
batch_mean_cost = layers.reduce_mean(masked_cost, dim=[0])
seq_cost = layers.reduce_sum(batch_mean_cost)
return seq_cost
class EncoderCell(RNNCell):
def __init__(self,
num_layers,
input_size,
hidden_size,
dropout_prob=0.,
init_scale=0.1):
super(EncoderCell, self).__init__()
self.dropout_prob = dropout_prob
# use add_sublayer to add multi-layers
self.lstm_cells = []
for i in range(num_layers):
self.lstm_cells.append(
self.add_sublayer(
"lstm_%d" % i,
BasicLSTMCell(
input_size=input_size if i == 0 else hidden_size,
hidden_size=hidden_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)))))
def forward(self, step_input, states):
new_states = []
for i, lstm_cell in enumerate(self.lstm_cells):
out, new_state = lstm_cell(step_input, states[i])
step_input = layers.dropout(
out,
self.dropout_prob,
dropout_implementation='upscale_in_train'
) if self.dropout_prob > 0 else out
new_states.append(new_state)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.lstm_cells]
class Encoder(Layer):
def __init__(self,
vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
init_scale=0.1):
super(Encoder, self).__init__()
self.embedder = Embedding(
size=[vocab_size, embed_dim],
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)))
self.stack_lstm = RNN(EncoderCell(num_layers, embed_dim, hidden_size,
dropout_prob, init_scale),
is_reverse=False,
time_major=False)
def forward(self, sequence, sequence_length):
inputs = self.embedder(sequence)
encoder_output, encoder_state = self.stack_lstm(
inputs, sequence_length=sequence_length)
return encoder_output, encoder_state
DecoderCell = EncoderCell
class Decoder(Layer):
def __init__(self,
vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
init_scale=0.1):
super(Decoder, self).__init__()
self.embedder = Embedding(
size=[vocab_size, embed_dim],
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)))
self.stack_lstm = RNN(DecoderCell(num_layers, embed_dim, hidden_size,
dropout_prob, init_scale),
is_reverse=False,
time_major=False)
self.output_layer = Linear(
hidden_size,
vocab_size,
param_attr=ParamAttr(initializer=UniformInitializer(
low=-init_scale, high=init_scale)),
bias_attr=False)
def forward(self, target, decoder_initial_states):
inputs = self.embedder(target)
decoder_output, _ = self.stack_lstm(
inputs, initial_states=decoder_initial_states)
predict = self.output_layer(decoder_output)
return predict
class BaseModel(Model):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
init_scale=0.1):
super(BaseModel, self).__init__()
self.hidden_size = hidden_size
self.encoder = Encoder(src_vocab_size, embed_dim, hidden_size,
num_layers, dropout_prob, init_scale)
self.decoder = Decoder(trg_vocab_size, embed_dim, hidden_size,
num_layers, dropout_prob, init_scale)
def forward(self, src, src_length, trg):
# encoder
encoder_output, encoder_final_states = self.encoder(src, src_length)
# decoder
predict = self.decoder(trg, encoder_final_states)
return predict
class BaseInferModel(BaseModel):
def __init__(self,
src_vocab_size,
trg_vocab_size,
embed_dim,
hidden_size,
num_layers,
dropout_prob=0.,
bos_id=0,
eos_id=1,
beam_size=4,
max_out_len=256):
args = dict(locals())
args.pop("self")
args.pop("__class__", None) # py3
self.bos_id = args.pop("bos_id")
self.eos_id = args.pop("eos_id")
self.beam_size = args.pop("beam_size")
self.max_out_len = args.pop("max_out_len")
super(BaseInferModel, self).__init__(**args)
# dynamic decoder for inference
decoder = BeamSearchDecoder(
self.decoder.stack_lstm.cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=self.decoder.embedder,
output_fn=self.decoder.output_layer)
self.beam_search_decoder = DynamicDecode(
decoder, max_step_num=max_out_len, is_test=True)
def forward(self, src, src_length):
# encoding
encoder_output, encoder_final_states = self.encoder(src, src_length)
# dynamic decoding with beam search
rs, _ = self.beam_search_decoder(inits=encoder_final_states)
return rs
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import os
import random
from functools import partial
import numpy as np
import paddle.fluid as fluid
from paddle.fluid.io import DataLoader
from hapi.model import Input, set_device
from args import parse_args
from seq2seq_base import BaseModel, CrossEntropyCriterion
from seq2seq_attn import AttentionModel
from reader import create_data_loader
from utility import PPL, TrainCallback
def do_train(args):
device = set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
if args.enable_ce:
fluid.default_main_program().random_seed = 102
fluid.default_startup_program().random_seed = 102
# define model
inputs = [
Input(
[None, None], "int64", name="src_word"),
Input(
[None], "int64", name="src_length"),
Input(
[None, None], "int64", name="trg_word"),
]
labels = [
Input(
[None], "int64", name="trg_length"),
Input(
[None, None, 1], "int64", name="label"),
]
# def dataloader
train_loader, eval_loader = create_data_loader(args, device)
model_maker = AttentionModel if args.attention else BaseModel
model = model_maker(args.src_vocab_size, args.tar_vocab_size,
args.hidden_size, args.hidden_size, args.num_layers,
args.dropout)
grad_clip = fluid.clip.GradientClipByGlobalNorm(
clip_norm=args.max_grad_norm)
optimizer = fluid.optimizer.Adam(
learning_rate=args.learning_rate,
parameter_list=model.parameters(),
grad_clip=grad_clip)
ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches
model.prepare(
optimizer,
CrossEntropyCriterion(),
ppl_metric,
inputs=inputs,
labels=labels)
model.fit(train_data=train_loader,
eval_data=eval_loader,
epochs=args.max_epoch,
eval_freq=1,
save_freq=1,
save_dir=args.model_path,
callbacks=[TrainCallback(ppl_metric, args.log_freq)])
if __name__ == "__main__":
args = parse_args()
do_train(args)
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import math
import paddle.fluid as fluid
from hapi.metrics import Metric
from hapi.callbacks import ProgBarLogger
class TrainCallback(ProgBarLogger):
def __init__(self, ppl, log_freq, verbose=2):
super(TrainCallback, self).__init__(log_freq, verbose)
self.ppl = ppl
def on_train_begin(self, logs=None):
super(TrainCallback, self).on_train_begin(logs)
self.train_metrics = ["ppl"] # remove loss to not print it
def on_epoch_begin(self, epoch=None, logs=None):
super(TrainCallback, self).on_epoch_begin(epoch, logs)
self.ppl.reset()
def on_train_batch_end(self, step, logs=None):
logs["ppl"] = self.ppl.cal_acc_ppl(logs["loss"][0], logs["batch_size"])
if step > 0 and step % self.ppl.reset_freq == 0:
self.ppl.reset()
super(TrainCallback, self).on_train_batch_end(step, logs)
def on_eval_begin(self, logs=None):
super(TrainCallback, self).on_eval_begin(logs)
self.eval_metrics = ["ppl"]
self.ppl.reset()
def on_eval_batch_end(self, step, logs=None):
logs["ppl"] = self.ppl.cal_acc_ppl(logs["loss"][0], logs["batch_size"])
super(TrainCallback, self).on_eval_batch_end(step, logs)
class PPL(Metric):
def __init__(self, reset_freq=100, name=None):
super(PPL, self).__init__()
self._name = name or "ppl"
self.reset_freq = reset_freq
self.reset()
def add_metric_op(self, pred, seq_length, label):
word_num = fluid.layers.reduce_sum(seq_length)
return word_num
def update(self, word_num):
self.word_count += word_num
return word_num
def reset(self):
self.total_loss = 0
self.word_count = 0
def accumulate(self):
return self.word_count
def name(self):
return self._name
def cal_acc_ppl(self, batch_loss, batch_size):
self.total_loss += batch_loss * batch_size
ppl = math.exp(self.total_loss / self.word_count)
return ppl
\ No newline at end of file
......@@ -6,7 +6,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
|模型|Precision|Recall|F1-score|
|:-:|:-:|:-:|:-:|
|Lexical Analysis|88.26%|89.20%|88.73%|
|Lexical Analysis|89.57%|89.96%|89.76%|
## 2. 快速开始
......@@ -22,7 +22,7 @@ Sequence Tagging,是一个序列标注模型,模型可用于实现,分词
克隆工具集代码库到本地
```bash
git clone https://github.com/PaddlePaddle/hapi.git
cd hapi/sequence_tagging
cd hapi/examples/sequence_tagging
```
#### 3. 环境依赖
......@@ -70,7 +70,7 @@ python -u train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
GPU上多卡训练
......@@ -84,7 +84,7 @@ python -m paddle.distributed.launch --selected_gpus=0,1,2,3 train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
CPU上训练
......@@ -95,7 +95,7 @@ python -u train.py \
--dynamic False
# --device: 使用gpu设备还是cpu设备
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
### 模型预测
......@@ -105,15 +105,13 @@ python -u train.py \
python predict.py \
--init_from_checkpoint model_baseline/params \
--output_file predict.result \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --output_file: 预测结果文件
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
### 模型评估
......@@ -123,14 +121,12 @@ python predict.py \
# baseline model
python eval.py \
--init_from_checkpoint ./model_baseline/params \
--mode predict \
--device cpu \
--dynamic False
# --init_from_checkpoint: 初始化模型
# --device: 使用gpu还是cpu设备
# --mode: 开启模式, 设置为train时,进行训练,设置为predict时进行预测
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为True, 动态图设置为False
# --dynamic: 是否使用动态图模式进行训练,如果使用静态图训练,设置为False, 动态图设置为True
```
......@@ -168,7 +164,7 @@ Overall Architecture of GRU-CRF-MODEL
训练使用的数据可以由用户根据实际的应用场景,自己组织数据。除了第一行是 `text_a\tlabel` 固定的开头,后面的每行数据都是由两列组成,以制表符分隔,第一列是 utf-8 编码的中文文本,以 `\002` 分割,第二列是对应每个字的标注,以 `\002` 分隔。我们采用 IOB2 标注体系,即以 X-B 作为类型为 X 的词的开始,以 X-I 作为类型为 X 的词的持续,以 O 表示不关注的字(实际上,在词性、专名联合标注中,不存在 O )。示例如下:
```text
除\002了\002他\002续\002任\002十\002二\002届\002政\002协\002委\002员\002,\002马\002化\002腾\002,\002雷\002军\002,\002李\002彦\002宏\002也\002被\002推\002选\002为\002新\002一\002届\002全\002国\002人\002大\002代\002表\002或\002全\002国\002政\002协\002委\002员 p-B\002p-I\002r-B\002v-B\002v-I\002m-B\002m-I\002m-I\002ORG-B\002ORG-I\002n-B\002n-I\002w-B\002PER-B\002PER-I\002PER-I\002w-B\002PER-B\002PER-I\002w-B\002PER-B\002PER-I\002PER-I\002d-B\002p-B\002v-B\002v-I\002v-B\002a-B\002m-B\002m-I\002ORG-B\002ORG-I\002ORG-I\002ORG-I\002n-B\002n-I\002c-B\002n-B\002n-I\002ORG-B\002ORG-I\002n-B\002n-I
除\002了\002他\002续\002任\002十\002二\002届\002政\002协\002委\002员\002,\002马\002化\002腾\002,\002雷\002军\002,\002李\002彦\002宏\002也\002被\002推\002选\002为\002新\002一\002届\002全\002国\002人\002大\002代\002表\002或\002全\002国\002政\002协\002委\002员 p-B\002p-I\002r-B\002v-B\002v-I\002m-B\002m-I\002m-I\002ORG-B\002ORG-I\002n-B\002n-I\002w-B\002PER-B\002PER-I\002PER-I\002w-B\002PER-B\002PER-I\002w-B\002PER-B\002PER-I\002PER-I\002d-B\002p-B\002v-B\002v-I\002v-B\002a-B\002m-B\002m-I\002ORG-B\002ORG-I\002ORG-I\002ORG-I\002n-B\002n-I\002c-B\002n-B\002n-I\002ORG-B\002ORG-I\002n-B\002n-I
```
+ 我们随同代码一并发布了完全版的模型和相关的依赖数据。但是,由于模型的训练数据过于庞大,我们没有发布训练数据,仅在`data`目录下放置少数样本用以示例输入数据格式。
......@@ -196,6 +192,7 @@ Overall Architecture of GRU-CRF-MODEL
├── eval.py # 词法分析评估的脚本
├── downloads.py # 用于下载数据和模型的脚本
├── downloads.sh # 用于下载数据和模型的脚本
├── sequence_tagging.yaml # 模型训练、预测、评估相关配置参数
└──reader.py # 文件读取相关函数
```
......@@ -207,11 +204,11 @@ Overall Architecture of GRU-CRF-MODEL
```text
@article{jiao2018LAC,
title={Chinese Lexical Analysis with Deep Bi-GRU-CRF Network},
author={Jiao, Zhenyu and Sun, Shuqi and Sun, Ke},
journal={arXiv preprint arXiv:1807.01882},
year={2018},
url={https://arxiv.org/abs/1807.01882}
title={Chinese Lexical Analysis with Deep Bi-GRU-CRF Network},
author={Jiao, Zhenyu and Sun, Shuqi and Sun, Ke},
journal={arXiv preprint arXiv:1807.01882},
year={2018},
url={https://arxiv.org/abs/1807.01882}
}
```
### 如何贡献代码
......
......@@ -35,7 +35,7 @@ FILE_INFO = {
},
'MODEL': {
'name': 'sequence_tagging_dy.tar.gz',
'md5': "1125d374c03c8218b6e47325dcf607e3"
'md5': "6ba37ceea8f1f764ba1fe227295a6a3b"
},
}
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
SequenceTagging eval structure
"""
from __future__ import division
......@@ -25,18 +25,16 @@ import math
import argparse
import numpy as np
from train import SeqTagging
from train import SeqTagging, ChunkEval, LacLoss
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from utils.metrics import chunk_count
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
......@@ -44,51 +42,33 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length'), Input(
[None, None], 'int64', name='target')
]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
eval_path = args.test_file
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
eval_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=eval_path, place=place, mode="test")
eval_dataset = create_dataloader(
eval_generator, place, feed_list=feed_list)
eval_dataset = LacDataLoader(args, place, phase="test")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model = SeqTagging(args, vocab_size, num_labels, mode="test")
model.mode = "test"
model.prepare(inputs=inputs)
model.prepare(
metrics=ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=place)
model.load(args.init_from_checkpoint, skip_mismatch=True)
for data in eval_dataset():
if len(data) == 1:
batch_data = data[0]
targets = np.array(batch_data[2])
else:
batch_data = data
targets = batch_data[2].numpy()
inputs_data = [batch_data[0], batch_data[1]]
crf_decode, length = model.test(inputs=inputs_data)
num_infer_chunks, num_label_chunks, num_correct_chunks = chunk_count(crf_decode, targets, length, dataset.id2label_dict)
chunk_evaluator.update(num_infer_chunks, num_label_chunks, num_correct_chunks)
precision, recall, f1 = chunk_evaluator.eval()
print("[test] P: %.5f, R: %.5f, F1: %.5f" % (precision, recall, f1))
model.evaluate(eval_dataset.dataloader, batch_size=args.batch_size)
if __name__ == '__main__':
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
......
......@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
SequenceTagging network structure
SequenceTagging predict structure
"""
from __future__ import division
......@@ -28,14 +28,13 @@ import numpy as np
from train import SeqTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.model import set_device, Input
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.layers.utils import flatten
......@@ -43,26 +42,18 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length')
]
feed_list = None if args.dynamic else [x.forward() for x in inputs]
dataset = LacDataset(args)
predict_path = args.predict_file
predict_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=predict_path, place=place, mode="predict")
predict_dataset = create_dataloader(
predict_generator, place, feed_list=feed_list)
predict_dataset = LacDataLoader(args, place, phase="predict")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model = SeqTagging(args, vocab_size, num_labels, mode="predict")
model.mode = "test"
model.prepare(inputs=inputs)
......@@ -70,20 +61,20 @@ def main(args):
model.load(args.init_from_checkpoint, skip_mismatch=True)
f = open(args.output_file, "wb")
for data in predict_dataset():
if len(data) == 1:
for data in predict_dataset.dataloader:
if len(data) == 1:
input_data = data[0]
else:
else:
input_data = data
results, length = model.test(inputs=flatten(input_data))
for i in range(len(results)):
results, length = model.test_batch(inputs=flatten(input_data))
for i in range(len(results)):
word_len = length[i]
word_ids = results[i][: word_len]
word_ids = results[i][:word_len]
tags = [dataset.id2label_dict[str(id)] for id in word_ids]
f.write("\002".join(tags) + "\n")
if __name__ == '__main__':
if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
......
......@@ -19,12 +19,19 @@ from __future__ import division
from __future__ import print_function
import io
import os
import leveldb
import numpy as np
import shutil
from functools import partial
import paddle
from paddle.io import BatchSampler, DataLoader, Dataset
from paddle.fluid.dygraph.parallel import ParallelEnv
from hapi.distributed import DistributedBatchSampler
class LacDataset(object):
class LacDataset(Dataset):
"""
Load lexical analysis dataset
"""
......@@ -34,6 +41,7 @@ class LacDataset(object):
self.label_dict_path = args.label_dict_path
self.word_rep_dict_path = args.word_rep_dict_path
self._load_dict()
self.examples = []
def _load_dict(self):
self.word2id_dict = self.load_kv_dict(
......@@ -108,152 +116,135 @@ class LacDataset(object):
label_ids.append(label_id)
return label_ids
def file_reader(self,
filename,
mode="train",
batch_size=32,
max_seq_len=126):
def file_reader(self, filename, phase="train"):
"""
yield (word_idx, target_idx) one by one from file,
or yield (word_idx, ) in `infer` mode
"""
def wrapper():
fread = io.open(filename, "r", encoding="utf-8")
if mode == "train":
headline = next(fread)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
word_ids = word_ids[0:max_seq_len]
words_len = np.int64(len(word_ids))
word_ids += [0 for _ in range(max_seq_len - words_len)]
label_ids = label_ids[0:max_seq_len]
label_ids += [0 for _ in range(max_seq_len - words_len)]
assert len(word_ids) == len(label_ids)
yield word_ids, label_ids, words_len
elif mode == "test":
headline = next(fread)
self.phase = phase
with io.open(filename, "r", encoding="utf8") as fr:
if phase in ["train", "test"]:
headline = next(fr)
headline = headline.strip().split('\t')
assert len(headline) == 2 and headline[0] == "text_a" and headline[
1] == "label"
buf = []
for line in fread:
words, labels = line.strip("\n").split("\t")
if len(words) < 1:
continue
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
words_len = np.int64(len(word_ids))
yield word_ids, label_ids, words_len
else:
for line in fread:
words = line.strip("\n").split('\t')[0]
if words == u"text_a":
assert len(headline) == 2 and headline[
0] == "text_a" and headline[1] == "label"
for line in fr:
line_str = line.strip("\n")
if len(line_str) < 1 and len(line_str.split('\t')) < 2:
continue
if "\002" not in words:
word_ids = self.word_to_ids(words)
else:
word_ids = self.word_to_ids(words.split("\002"))
words_len = np.int64(len(word_ids))
yield word_ids, words_len
fread.close()
self.examples.append(line_str)
else:
for idx, line in enumerate(fr):
words = line.strip("\n").split("\t")[0]
self.examples.append(words)
def __getitem__(self, idx):
line_str = self.examples[idx]
if self.phase in ["train", "test"]:
words, labels = line_str.split('\t')
word_ids = self.word_to_ids(words.split("\002"))
label_ids = self.label_to_ids(labels.split("\002"))
assert len(word_ids) == len(label_ids)
return word_ids, label_ids
else:
words = [w for w in line_str]
word_ids = self.word_to_ids(words)
return word_ids
def __len__(self):
return wrapper
return len(self.examples)
def create_lexnet_data_generator(args, reader, file_name, place, mode="train"):
def padding_data(max_len, batch_data):
def create_lexnet_data_generator(args, insts, phase="train"):
def padding_data(max_len, batch_data, if_len=False):
padding_batch_data = []
for data in batch_data:
padding_lens = []
for data in batch_data:
data = data[:max_len]
if if_len:
seq_len = np.int64(len(data))
padding_lens.append(seq_len)
data += [0 for _ in range(max_len - len(data))]
padding_batch_data.append(data)
return padding_batch_data
def wrapper():
if mode == "train":
batch_words, batch_labels, seq_lens = [], [], []
for epoch in xrange(args.epoch):
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
batch_labels.append(labels)
seq_lens.append(words_len)
if len(seq_lens) == args.batch_size:
yield batch_words, seq_lens, batch_labels, batch_labels
batch_words, batch_labels, seq_lens = [], [], []
if len(seq_lens) > 0:
yield batch_words, seq_lens, batch_labels, batch_labels
elif mode == "test":
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, labels, words_len = instance
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
batch_labels.append(labels)
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
batch_words, batch_labels, seq_lens, max_len = [], [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
padding_batch_labels = padding_data(max_len, batch_labels)
yield padding_batch_words, seq_lens, padding_batch_labels, padding_batch_labels
else:
batch_words, seq_lens, max_len = [], [], 0
for instance in reader.file_reader(
file_name, mode, max_seq_len=args.max_seq_len)():
words, words_len = instance
if len(seq_lens) < args.batch_size:
batch_words.append(words)
seq_lens.append(words_len)
max_len = words_len if words_len > max_len else max_len
if len(seq_lens) == args.batch_size:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
batch_words, seq_lens, max_len = [], [], 0
if len(seq_lens) > 0:
padding_batch_words = padding_data(max_len, batch_words)
yield padding_batch_words, seq_lens
return wrapper
def create_dataloader(generator, place, feed_list=None):
if not feed_list:
data_loader = paddle.io.DataLoader.from_generator(
capacity=50,
use_double_buffer=True,
iterable=True,
return_list=True)
if if_len:
return np.array(padding_batch_data), np.array(padding_lens)
else:
return np.array(padding_batch_data)
if phase == "train":
batch_words = [inst[0] for inst in insts]
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
args.max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(args.max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
elif phase == "test":
batch_words = [inst[0] for inst in insts]
seq_len = [len(inst[0]) for inst in insts]
max_seq_len = max(seq_len)
batch_labels = [inst[1] for inst in insts]
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
padding_batch_labels = padding_data(max_seq_len, batch_labels)
return [
padding_batch_words, padding_lens, padding_batch_labels,
padding_batch_labels
]
else:
data_loader = paddle.io.DataLoader.from_generator(
feed_list=feed_list,
capacity=50,
use_double_buffer=True,
iterable=True,
batch_words = insts
seq_len = [len(inst) for inst in insts]
max_seq_len = max(seq_len)
padding_batch_words, padding_lens = padding_data(
max_seq_len, batch_words, if_len=True)
return [padding_batch_words, padding_lens]
class LacDataLoader(object):
def __init__(self,
args,
place,
phase="train",
shuffle=False,
num_workers=0,
drop_last=False):
assert phase in [
"train", "test", "predict"
], "phase should be in [train, test, predict], but get %s" % phase
if phase == "train":
file_name = args.train_file
elif phase == "test":
file_name = args.test_file
elif phase == "predict":
file_name = args.predict_file
self.dataset = LacDataset(args)
self.dataset.file_reader(file_name, phase=phase)
if phase == "train":
self.sampler = DistributedBatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
else:
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=args.batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
batch_sampler=self.sampler,
places=place,
collate_fn=partial(
create_lexnet_data_generator, args, phase=phase),
num_workers=num_workers,
return_list=True)
data_loader.set_batch_generator(generator, places=place)
return data_loader
word_dict_path: "./conf/word.dic"
label_dict_path: "./conf/tag.dic"
word_rep_dict_path: "./conf/q2b.dic"
device: "cpu"
device: "gpu"
dynamic: True
epoch: 10
base_learning_rate: 0.001
......@@ -14,7 +14,7 @@ batch_size: 300
max_seq_len: 126
num_devices: 1
save_dir: "model"
init_from_checkpoint: "model_baseline/params"
init_from_checkpoint: ""
init_from_pretrain_model: ""
save_freq: 1
eval_freq: 1
......@@ -22,4 +22,3 @@ output_file: "predict.result"
test_file: "./data/test.tsv"
train_file: "./data/train.tsv"
predict_file: "./data/infer.tsv"
mode: "train"
......@@ -28,21 +28,23 @@ import numpy as np
work_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
sys.path.append(os.path.join(work_dir, "../"))
from hapi.metrics import Metric
from hapi.model import Model, Input, Loss, set_device
from hapi.text.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
from reader import LacDataset, create_lexnet_data_generator, create_dataloader
from reader import LacDataset, LacDataLoader
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
__all__ = ["SeqTagging", "LacLoss", "ChunkEval"]
class SeqTagging(Model):
def __init__(self, args, vocab_size, num_labels, length=None):
def __init__(self, args, vocab_size, num_labels, length=None,
mode="train"):
super(SeqTagging, self).__init__()
"""
define the lexical analysis network structure
......@@ -53,7 +55,7 @@ class SeqTagging(Model):
for infer: return the prediction
otherwise: return the prediction
"""
self.mode_type = args.mode
self.mode_type = mode
self.word_emb_dim = args.word_emb_dim
self.vocab_size = vocab_size
self.num_labels = num_labels
......@@ -65,19 +67,19 @@ class SeqTagging(Model):
self.bigru_num = args.bigru_num
self.batch_size = args.batch_size
self.init_bound = 0.1
self.length=length
self.length = length
self.sequence_tagging = SequenceTagging(
vocab_size=self.vocab_size,
num_labels=self.num_labels,
batch_size=self.batch_size,
word_emb_dim=self.word_emb_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
emb_learning_rate=self.emb_lr,
crf_learning_rate=self.crf_lr,
bigru_num=self.bigru_num,
init_bound=self.init_bound,
length=self.length)
vocab_size=self.vocab_size,
num_labels=self.num_labels,
batch_size=self.batch_size,
word_emb_dim=self.word_emb_dim,
grnn_hidden_dim=self.grnn_hidden_dim,
emb_learning_rate=self.emb_lr,
crf_learning_rate=self.crf_lr,
bigru_num=self.bigru_num,
init_bound=self.init_bound,
length=self.length)
def forward(self, *inputs):
"""
......@@ -85,10 +87,10 @@ class SeqTagging(Model):
"""
word = inputs[0]
lengths = inputs[1]
if self.mode_type == "train" or self.mode_type == "test":
if self.mode_type == "train" or self.mode_type == "test":
target = inputs[2]
outputs = self.sequence_tagging(word, lengths, target)
else:
else:
outputs = self.sequence_tagging(word, lengths)
return outputs
......@@ -156,7 +158,7 @@ class ChunkEval(Metric):
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, *args):
def add_metric_op(self, *args):
crf_decode = args[0]
lengths = args[2]
label = args[3]
......@@ -207,30 +209,25 @@ def main(args):
place = set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [Input([None, None], 'int64', name='words'),
Input([None], 'int64', name='length'),
Input([None, None], 'int64', name='target')]
inputs = [
Input(
[None, None], 'int64', name='words'), Input(
[None], 'int64', name='length'), Input(
[None, None], 'int64', name='target')
]
labels = [Input([None, None], 'int64', name='labels')]
feed_list = None if args.dynamic else [x.forward() for x in inputs + labels]
dataset = LacDataset(args)
train_path = args.train_file
test_path = args.test_file
train_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=train_path, place=place, mode="train")
test_generator = create_lexnet_data_generator(
args, reader=dataset, file_name=test_path, place=place, mode="test")
feed_list = None if args.dynamic else [
x.forward() for x in inputs + labels
]
train_dataset = create_dataloader(
train_generator, place, feed_list=feed_list)
test_dataset = create_dataloader(
test_generator, place, feed_list=feed_list)
dataset = LacDataset(args)
train_dataset = LacDataLoader(args, place, phase="train")
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels)
model = SeqTagging(args, vocab_size, num_labels, mode="train")
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
......@@ -250,8 +247,7 @@ def main(args):
if args.init_from_pretrain_model:
model.load(args.init_from_pretrain_model, reset_optimizer=True)
model.fit(train_dataset,
test_dataset,
model.fit(train_dataset.dataloader,
epochs=args.epoch,
batch_size=args.batch_size,
eval_freq=args.eval_freq,
......@@ -263,7 +259,7 @@ if __name__ == '__main__':
args = PDConfig(yaml_file="sequence_tagging.yaml")
args.build()
args.Print()
use_gpu = True if args.device == "gpu" else False
check_gpu(use_gpu)
check_version()
......
......@@ -195,13 +195,19 @@ class PDConfig(object):
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
self.default_g.add_arg(
"do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.default_g.add_arg(
"is_profiler", int, 0,
"the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg(
"profiler_path", str, './',
"the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0,
"the max train batch num.(used for benchmark)")
self.parser = parser
......
......@@ -23,7 +23,7 @@ import paddle.fluid as fluid
__all__ = ['chunk_count', "build_chunk"]
def build_chunk(data_list, id2label_dict):
def build_chunk(data_list, id2label_dict):
"""
Assembly entity
"""
......@@ -31,29 +31,29 @@ def build_chunk(data_list, id2label_dict):
ner_dict = {}
ner_str = ""
ner_start = 0
for i in range(len(tag_list)):
for i in range(len(tag_list)):
tag = tag_list[i]
if tag == u"O":
if i != 0:
if tag == u"O":
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag
elif tag.endswith(u"B"):
if i != 0:
ner_str = tag
elif tag.endswith(u"B"):
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
elif tag.endswith(u"I"):
if tag.split('-')[0] != ner_str:
if i != 0:
elif tag.endswith(u"I"):
if tag.split('-')[0] != ner_str:
if i != 0:
key = "%d_%d" % (ner_start, i - 1)
ner_dict[key] = ner_str
ner_start = i
ner_str = tag.split('-')[0]
return ner_dict
def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
"""
......@@ -62,15 +62,14 @@ def chunk_count(infer_numpy, label_numpy, seq_len, id2label_dict):
num_infer_chunks, num_label_chunks, num_correct_chunks = 0, 0, 0
assert infer_numpy.shape[0] == label_numpy.shape[0]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][: seq_len[i]]
label_list = label_numpy[i][: seq_len[i]]
for i in range(infer_numpy.shape[0]):
infer_list = infer_numpy[i][:seq_len[i]]
label_list = label_numpy[i][:seq_len[i]]
infer_dict = build_chunk(infer_list, id2label_dict)
num_infer_chunks += len(infer_dict)
label_dict = build_chunk(label_list, id2label_dict)
num_label_chunks += len(label_dict)
for key in infer_dict:
if key in label_dict and label_dict[key] == infer_dict[key]:
for key in infer_dict:
if key in label_dict and label_dict[key] == infer_dict[key]:
num_correct_chunks += 1
return num_infer_chunks, num_label_chunks, num_correct_chunks
# 图像风格迁移
图像的风格迁移是卷积神经网络有趣的应用之一。那什么是风格迁移呢?下图第一列左边的图为相机拍摄的一张普通图片,右边的图为梵高的名画星空。那如何让左边的普通图片拥有星空的风格呢。神经网络的风格迁移就可以帮助你生成第二列的这样的图片。
<div align=center>
<img src="images/markdown/img1.png" width = "600" height = "300" />
</br>
<img src="images/markdown/img2.png" width = "300" height = "300" divalign=center />
<div align=left>
## 基本原理
风格迁移的目标就是使得生成图片的内容与内容图片(content image)尽可能相似。由于在计算机中,我们用一个一个像素点表示图片,所以两个图片的相似程度我们可以用每个像素点的欧式距离来表示。而两个图片的风格相似度,我们采用两个图片在卷积神经网络中相同的一层特征图的gram矩阵的欧式距离来表示。对于一个特征图gram矩阵的计算如下所示:
```python
# tensor shape is [1, c, h, w]
_, c, h, w = tensor.shape
tensor = fluid.layers.reshape(tensor, [c, h * w])
# gram matrix with shape: [c, c]
gram_matrix = fluid.layers.matmul(tensor, fluid.layers.transpose(tensor, [1, 0]))
```
最终风格迁移的问题转化为优化上述的两个欧式距离的问题。这里要注意的是,我们使用一个在imagenet上预训练好的模型vgg16,并且固定参数,优化器只更新输入的生成图像的值。
## 具体实现
接下来,使用代码一步一步来实现上述图片的风格迁移
```python
# 导入所需的模块
%matplotlib inline
import numpy as np
import matplotlib.pyplot as plt
from hapi.model import Model, Loss
from hapi.vision.models import vgg16
from hapi.vision.transforms import transforms
from paddle import fluid
from paddle.fluid.io import Dataset
import cv2
import copy
# 图像预处理函数,和tensor恢复到自然图像的函数
from .style_transfer import load_image, image_restore
```
```python
# 启动动态图模式
fluid.enable_dygraph()
```
```python
# 内容图像,用于风格迁移
content_path = './images/chicago_cropped.jpg'
# 风格图像
style_path = './images/Starry-Night-by-Vincent-Van-Gogh-painting.jpg'
```
```python
# 可视化两个图像
content = load_image(content_path)
style = load_image(style_path, shape=tuple(content.shape[-2:]))
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(10, 10))
ax1.imshow(image_restore(content))
ax2.imshow(image_restore(style))
```
![png](images/markdown/output_10_1.png)
```python
# 定义风格迁移模型,使用在imagenet上预训练好的vgg16作为基础模型
class StyleTransferModel(Model):
def __init__(self):
super(StyleTransferModel, self).__init__()
# pretrained设置为true,会自动下载imagenet上的预训练权重并加载
vgg = vgg16(pretrained=True)
self.base_model = vgg.features
for p in self.base_model.parameters():
p.stop_gradient=True
self.layers = {
'0': 'conv1_1',
'3': 'conv2_1',
'6': 'conv3_1',
'10': 'conv4_1',
'11': 'conv4_2', ## content representation
'14': 'conv5_1'
}
def forward(self, image):
outputs = []
for name, layer in self.base_model.named_sublayers():
image = layer(image)
if name in self.layers:
outputs.append(image)
return outputs
```
```python
# 定义风格迁移个损失函数
class StyleTransferLoss(Loss):
def __init__(self, content_loss_weight=1, style_loss_weight=1e5, style_weights=[1.0, 0.8, 0.5, 0.3, 0.1]):
super(StyleTransferLoss, self).__init__()
self.content_loss_weight = content_loss_weight
self.style_loss_weight = style_loss_weight
self.style_weights = style_weights
def forward(self, outputs, labels):
content_features = labels[-1]
style_features = labels[:-1]
# 计算图像内容相似度的loss
content_loss = fluid.layers.mean((outputs[-2] - content_features)**2)
# 计算风格相似度的loss
style_loss = 0
style_grams = [self.gram_matrix(feat) for feat in style_features ]
style_weights = self.style_weights
for i, weight in enumerate(style_weights):
target_gram = self.gram_matrix(outputs[i])
layer_loss = weight * fluid.layers.mean((target_gram - style_grams[i])**2)
b, d, h, w = outputs[i].shape
style_loss += layer_loss / (d * h * w)
total_loss = self.content_loss_weight * content_loss + self.style_loss_weight * style_loss
return total_loss
def gram_matrix(self, A):
if len(A.shape) == 4:
batch_size, c, h, w = A.shape
A = fluid.layers.reshape(A, (c, h*w))
GA = fluid.layers.matmul(A, fluid.layers.transpose(A, [1, 0]))
return GA
```
```python
# 创建模型
model = StyleTransferModel()
```
```python
# 创建损失函数
style_loss = StyleTransferLoss()
```
```python
# 使用内容图像初始化要生成的图像
target = Model.create_parameter(model, shape=content.shape)
target.set_value(content.numpy())
```
```python
# 创建优化器
optimizer = fluid.optimizer.Adam(parameter_list=[target], learning_rate=0.001)
```
```python
# 初始化高级api
model.prepare(optimizer, style_loss)
```
```python
# 使用内容图像和风格图像获取内容特征和风格特征
content_fetures = model.test_batch(content)
style_features = model.test_batch(style)
```
```python
# 将两个特征组合,作为损失函数的label传给模型
feats = style_features + [content_fetures[-2]]
```
```python
# 训练5000个step,每500个step画一下生成的图像查看效果
steps = 5000
for i in range(steps):
outs = model.train_batch(target, feats)
if i % 500 == 0:
print('iters:', i, 'loss:', outs[0])
plt.imshow(image_restore(target))
plt.show()
```
iters: 0 loss: [8.829961e+09]
![png](images/markdown/output_20_1.png)
iters: 500 loss: [3.728548e+08]
![png](images/markdown/output_20_3.png)
iters: 1000 loss: [1.6327214e+08]
![png](images/markdown/output_20_5.png)
iters: 1500 loss: [1.0806553e+08]
![png](images/markdown/output_20_7.png)
iters: 2000 loss: [81069480.]
![png](images/markdown/output_20_9.png)
iters: 2500 loss: [64284104.]
![png](images/markdown/output_20_11.png)
iters: 3000 loss: [52580884.]
![png](images/markdown/output_20_13.png)
iters: 3500 loss: [43825304.]
![png](images/markdown/output_20_15.png)
iters: 4000 loss: [37048400.]
![png](images/markdown/output_20_17.png)
iters: 4500 loss: [31719670.]
![png](images/markdown/output_20_19.png)
```python
# 风格迁移后的图像
fig, (ax1, ax2, ax3) = plt.subplots(1, 3, figsize=(20, 10))
ax1.imshow(image_restore(content))
ax2.imshow(image_restore(target))
ax3.imshow(image_restore(style))
```
![png](images/markdown/output_21_1.png)
## 总结
上述可运行的代码都在[style-transfer.ipynb](./style-transfer.ipynb)中, 同时我们提供了[style-transfer.py](./style-transfer.py)脚本,可以直接执行如下命令,实现图片的风格迁移:
```shell
python -u style-transfer.py --content-image /path/to/your-content-image --style-image /path/to/your-style-image --save-dir /path/to/your-output-dir
```
风格迁移生成的图像保存在```--save-dir```中。
## 参考
[A Neural Algorithm of Artistic Style](https://arxiv.org/abs/1508.06576)
此差异已折叠。
import os
import argparse
import numpy as np
import matplotlib.pyplot as plt
from hapi.model import Model, Loss
from hapi.vision.models import vgg16
from hapi.vision.transforms import transforms
from paddle import fluid
from paddle.fluid.io import Dataset
import cv2
import copy
def load_image(image_path, max_size=400, shape=None):
image = cv2.imread(image_path)
image = image.astype('float32') / 255.0
size = shape if shape is not None else max_size if max(
image.shape[:2]) > max_size else max(image.shape[:2])
transform = transforms.Compose([
transforms.Resize(size), transforms.Permute(),
transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
image = transform(image)[np.newaxis, :3, :, :]
image = fluid.dygraph.to_variable(image)
return image
def image_restore(image):
image = np.squeeze(image.numpy(), 0)
image = image.transpose(1, 2, 0)
image = image * np.array((0.229, 0.224, 0.225)) + np.array(
(0.485, 0.456, 0.406))
image = image.clip(0, 1)
return image
class StyleTransferModel(Model):
def __init__(self):
super(StyleTransferModel, self).__init__()
# pretrained设置为true,会自动下载imagenet上的预训练权重并加载
vgg = vgg16(pretrained=True)
self.base_model = vgg.features
for p in self.base_model.parameters():
p.stop_gradient = True
self.layers = {
'0': 'conv1_1',
'3': 'conv2_1',
'6': 'conv3_1',
'10': 'conv4_1',
'11': 'conv4_2', ## content representation
'14': 'conv5_1'
}
def forward(self, image):
outputs = []
for name, layer in self.base_model.named_sublayers():
image = layer(image)
if name in self.layers:
outputs.append(image)
return outputs
class StyleTransferLoss(Loss):
def __init__(self,
content_loss_weight=1,
style_loss_weight=1e5,
style_weights=[1.0, 0.8, 0.5, 0.3, 0.1]):
super(StyleTransferLoss, self).__init__()
self.content_loss_weight = content_loss_weight
self.style_loss_weight = style_loss_weight
self.style_weights = style_weights
def forward(self, outputs, labels):
content_features = labels[-1]
style_features = labels[:-1]
# 计算图像内容相似度的loss
content_loss = fluid.layers.mean((outputs[-2] - content_features)**2)
# 计算风格相似度的loss
style_loss = 0
style_grams = [self.gram_matrix(feat) for feat in style_features]
style_weights = self.style_weights
for i, weight in enumerate(style_weights):
target_gram = self.gram_matrix(outputs[i])
layer_loss = weight * fluid.layers.mean((target_gram - style_grams[
i])**2)
b, d, h, w = outputs[i].shape
style_loss += layer_loss / (d * h * w)
total_loss = self.content_loss_weight * content_loss + self.style_loss_weight * style_loss
return total_loss
def gram_matrix(self, A):
if len(A.shape) == 4:
_, c, h, w = A.shape
A = fluid.layers.reshape(A, (c, h * w))
GA = fluid.layers.matmul(A, fluid.layers.transpose(A, [1, 0]))
return GA
def main():
# 启动动态图模式
fluid.enable_dygraph()
content = load_image(FLAGS.content_image)
style = load_image(FLAGS.style_image, shape=tuple(content.shape[-2:]))
model = StyleTransferModel()
style_loss = StyleTransferLoss()
# 使用内容图像初始化要生成的图像
target = Model.create_parameter(model, shape=content.shape)
target.set_value(content.numpy())
optimizer = fluid.optimizer.Adam(
parameter_list=[target], learning_rate=FLAGS.lr)
model.prepare(optimizer, style_loss)
content_fetures = model.test_batch(content)
style_features = model.test_batch(style)
# 将两个特征组合,作为损失函数的label传给模型
feats = style_features + [content_fetures[-2]]
# 训练5000个step,每500个step画一下生成的图像查看效果
steps = FLAGS.steps
for i in range(steps):
outs = model.train_batch(target, feats)
if i % 500 == 0:
print('iters:', i, 'loss:', outs[0][0])
if not os.path.exists(FLAGS.save_dir):
os.makedirs(FLAGS.save_dir)
# 保存生成好的图像
name = FLAGS.content_image.split(os.sep)[-1]
output_path = os.path.join(FLAGS.save_dir, 'generated_' + name)
cv2.imwrite(output_path,
cv2.cvtColor((image_restore(target) * 255).astype('uint8'),
cv2.COLOR_RGB2BGR))
if __name__ == '__main__':
parser = argparse.ArgumentParser("Resnet Training on ImageNet")
parser.add_argument(
"--content-image",
type=str,
default='./images/chicago_cropped.jpg',
help="content image")
parser.add_argument(
"--style-image",
type=str,
default='./images/Starry-Night-by-Vincent-Van-Gogh-painting.jpg',
help="style image")
parser.add_argument(
"--save-dir", type=str, default='./output', help="output dir")
parser.add_argument(
"--steps", default=5000, type=int, help="number of steps to run")
parser.add_argument(
'--lr',
'--learning-rate',
default=1e-3,
type=float,
metavar='LR',
help='initial learning rate')
FLAGS = parser.parse_args()
main()
......@@ -201,7 +201,7 @@ python -u predict.py \
--special_token '<s>' '<e>' '<unk>' \
--predict_file gen_data/wmt16_ende_data_bpe/newstest2014.tok.bpe.32000.en-de \
--batch_size 32 \
--init_from_params base_model_dygraph/step_100000/transformer \
--init_from_params big_model_dygraph/step_100000/transformer \
--beam_size 5 \
--max_out_len 255 \
--output_file predict.txt \
......
......@@ -14,9 +14,6 @@
import logging
import os
import six
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
from functools import partial
import numpy as np
......@@ -28,9 +25,9 @@ from paddle.fluid.layers.utils import flatten
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from model import Input, set_device
from hapi.model import Input, set_device
from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler
from transformer import InferTransformer, position_encoding_init
from transformer import InferTransformer
def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
......@@ -132,7 +129,7 @@ def do_predict(args):
# TODO: use model.predict when support variant length
f = open(args.output_file, "wb")
for data in data_loader():
finished_seq = transformer.test(inputs=flatten(data))[0]
finished_seq = transformer.test_batch(inputs=flatten(data))[0]
finished_seq = np.transpose(finished_seq, [0, 2, 1])
for ins in finished_seq:
for beam_idx, beam in enumerate(ins):
......
......@@ -13,7 +13,7 @@
# limitations under the License.
import glob
import six
import sys
import os
import io
import itertools
......@@ -26,7 +26,7 @@ from paddle.io import BatchSampler, DataLoader, Dataset
def create_data_loader(args, device):
data_loaders = [None, None]
data_loaders = [(None, None)] * 2
data_files = [args.training_file, args.validation_file
] if args.validation_file else [args.training_file]
for i, data_file in enumerate(data_files):
......@@ -65,7 +65,7 @@ def create_data_loader(args, device):
n_head=args.n_head),
num_workers=0, # TODO: use multi-process
return_list=True)
data_loaders[i] = data_loader
data_loaders[i] = (data_loader, batch_sampler.__len__)
return data_loaders
......@@ -289,7 +289,6 @@ class Seq2SeqDataset(Dataset):
start_mark="<s>",
end_mark="<e>",
unk_mark="<unk>",
only_src=False,
trg_fpattern=None,
byte_data=False):
if byte_data:
......@@ -477,6 +476,7 @@ class Seq2SeqBatchSampler(BatchSampler):
for i in range(self._nranks)
] for batch in batches]
batches = list(itertools.chain.from_iterable(batches))
self.batch_number = (len(batches) + self._nranks - 1) // self._nranks
# for multi-device
for batch_id, batch in enumerate(batches):
......@@ -490,11 +490,13 @@ class Seq2SeqBatchSampler(BatchSampler):
yield batch_indices
def __len__(self):
if hasattr(self, "batch_number"): #
return self.batch_number
if not self._use_token_batch:
batch_number = (
len(self._dataset) + self._batch_size * self._nranks - 1) // (
self._batch_size * self._nranks)
else:
# TODO(guosheng): fix the uncertain length
batch_number = 1
# for uncertain batch number, the actual value is self.batch_number
batch_number = sys.maxsize
return batch_number
......@@ -14,9 +14,6 @@
import logging
import os
import six
import sys
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
import numpy as np
import paddle
......@@ -26,14 +23,18 @@ from paddle.io import DataLoader
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from model import Input, set_device
from callbacks import ProgBarLogger
from hapi.model import Input, set_device
from hapi.callbacks import ProgBarLogger
from reader import create_data_loader
from transformer import Transformer, CrossEntropyCriterion
class TrainCallback(ProgBarLogger):
def __init__(self, args, verbose=2):
def __init__(self,
args,
verbose=2,
train_steps_fn=None,
eval_steps_fn=None):
# TODO(guosheng): save according to step
super(TrainCallback, self).__init__(args.print_step, verbose)
# the best cross-entropy value with label smoothing
......@@ -42,11 +43,17 @@ class TrainCallback(ProgBarLogger):
(1. - args.label_smooth_eps)) + args.label_smooth_eps *
np.log(args.label_smooth_eps / (args.trg_vocab_size - 1) + 1e-20))
self.loss_normalizer = loss_normalizer
self.train_steps_fn = train_steps_fn
self.eval_steps_fn = eval_steps_fn
def on_train_begin(self, logs=None):
super(TrainCallback, self).on_train_begin(logs)
self.train_metrics += ["normalized loss", "ppl"]
def on_train_batch_begin(self, step, logs=None):
if step == 0 and self.train_steps_fn:
self.train_progbar._num = self.train_steps_fn()
def on_train_batch_end(self, step, logs=None):
logs["normalized loss"] = logs["loss"][0] - self.loss_normalizer
logs["ppl"] = np.exp(min(logs["loss"][0], 100))
......@@ -57,6 +64,10 @@ class TrainCallback(ProgBarLogger):
self.eval_metrics = list(
self.eval_metrics) + ["normalized loss", "ppl"]
def on_eval_batch_begin(self, step, logs=None):
if step == 0 and self.eval_steps_fn:
self.eval_progbar._num = self.eval_steps_fn()
def on_eval_batch_end(self, step, logs=None):
logs["normalized loss"] = logs["loss"][0] - self.loss_normalizer
logs["ppl"] = np.exp(min(logs["loss"][0], 100))
......@@ -104,7 +115,8 @@ def do_train(args):
]
# def dataloader
train_loader, eval_loader = create_data_loader(args, device)
(train_loader, train_steps_fn), (
eval_loader, eval_steps_fn) = create_data_loader(args, device)
# define model
transformer = Transformer(
......@@ -142,7 +154,12 @@ def do_train(args):
eval_freq=1,
save_freq=1,
save_dir=args.save_model,
callbacks=[TrainCallback(args)])
callbacks=[
TrainCallback(
args,
train_steps_fn=train_steps_fn,
eval_steps_fn=eval_steps_fn)
])
if __name__ == "__main__":
......
......@@ -20,8 +20,8 @@ import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer, to_variable
from paddle.fluid.dygraph.learning_rate_scheduler import LearningRateDecay
from model import Model, CrossEntropy, Loss
from text import TransformerBeamSearchDecoder, DynamicDecode
from hapi.model import Model, CrossEntropy, Loss
from hapi.text import TransformerBeamSearchDecoder, DynamicDecode
def position_encoding_init(n_position, d_pos_vec):
......
......@@ -195,13 +195,19 @@ class PDConfig(object):
"Whether to perform predicting.")
self.default_g.add_arg("do_eval", bool, False,
"Whether to perform evaluating.")
self.default_g.add_arg("do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
self.default_g.add_arg(
"do_save_inference_model", bool, False,
"Whether to perform model saving for inference.")
# NOTE: args for profiler
self.default_g.add_arg("is_profiler", int, 0, "the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg("profiler_path", str, './', "the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0, "the max train batch num.(used for benchmark)")
self.default_g.add_arg(
"is_profiler", int, 0,
"the switch of profiler tools. (used for benchmark)")
self.default_g.add_arg(
"profiler_path", str, './',
"the profiler output file path. (used for benchmark)")
self.default_g.add_arg("max_iter", int, 0,
"the max train batch num.(used for benchmark)")
self.parser = parser
......
......@@ -27,7 +27,7 @@ from paddle.io import DataLoader
from hapi.model import Model, Input, set_device
from hapi.distributed import DistributedBatchSampler
from hapi.vision.transforms import Compose, BatchCompose
from hapi.vision.transforms import BatchCompose
from modeling import yolov3_darknet53, YoloLoss
from coco import COCODataset
......@@ -43,10 +43,9 @@ def make_optimizer(step_per_epoch, parameter_list=None):
momentum = 0.9
weight_decay = 5e-4
boundaries = [step_per_epoch * e for e in [200, 250]]
values = [base_lr * (0.1 ** i) for i in range(len(boundaries) + 1)]
values = [base_lr * (0.1**i) for i in range(len(boundaries) + 1)]
learning_rate = fluid.layers.piecewise_decay(
boundaries=boundaries,
values=values)
boundaries=boundaries, values=values)
learning_rate = fluid.layers.linear_lr_warmup(
learning_rate=learning_rate,
warmup_steps=warm_up_iter,
......@@ -63,77 +62,88 @@ def make_optimizer(step_per_epoch, parameter_list=None):
def main():
device = set_device(FLAGS.device)
fluid.enable_dygraph(device) if FLAGS.dynamic else None
inputs = [Input([None, 1], 'int64', name='img_id'),
Input([None, 2], 'int32', name='img_shape'),
Input([None, 3, None, None], 'float32', name='image')]
labels = [Input([None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'),
Input([None, NUM_MAX_BOXES], 'int32', name='gt_label'),
Input([None, NUM_MAX_BOXES], 'float32', name='gt_score')]
if not FLAGS.eval_only: # training mode
train_transform = Compose([ColorDistort(),
RandomExpand(),
RandomCrop(),
RandomFlip(),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
inputs = [
Input(
[None, 1], 'int64', name='img_id'), Input(
[None, 2], 'int32', name='img_shape'), Input(
[None, 3, None, None], 'float32', name='image')
]
labels = [
Input(
[None, NUM_MAX_BOXES, 4], 'float32', name='gt_bbox'), Input(
[None, NUM_MAX_BOXES], 'int32', name='gt_label'), Input(
[None, NUM_MAX_BOXES], 'float32', name='gt_score')
]
if not FLAGS.eval_only: # training mode
train_transform = Compose([
ColorDistort(), RandomExpand(), RandomCrop(), RandomFlip(),
NormalizeBox(), PadBox(), BboxXYXY2XYWH()
])
train_collate_fn = BatchCompose([RandomShape(), NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
anno_path='annotations/instances_train2017.json',
image_dir='train2017',
with_background=False,
mixup=True,
transform=train_transform)
batch_sampler = DistributedBatchSampler(dataset,
batch_size=FLAGS.batch_size,
shuffle=True,
drop_last=True)
loader = DataLoader(dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
else: # evaluation mode
eval_transform = Compose([ResizeImage(target_size=608),
NormalizeBox(),
PadBox(),
BboxXYXY2XYWH()])
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_train2017.json',
image_dir='train2017',
with_background=False,
mixup=True,
transform=train_transform)
batch_sampler = DistributedBatchSampler(
dataset, batch_size=FLAGS.batch_size, shuffle=True, drop_last=True)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=train_collate_fn)
else: # evaluation mode
eval_transform = Compose([
ResizeImage(target_size=608), NormalizeBox(), PadBox(),
BboxXYXY2XYWH()
])
eval_collate_fn = BatchCompose([NormalizeImage()])
dataset = COCODataset(dataset_dir=FLAGS.data,
anno_path='annotations/instances_val2017.json',
image_dir='val2017',
with_background=False,
transform=eval_transform)
dataset = COCODataset(
dataset_dir=FLAGS.data,
anno_path='annotations/instances_val2017.json',
image_dir='val2017',
with_background=False,
transform=eval_transform)
# batch_size can only be 1 in evaluation for YOLOv3
# prediction bbox is a LoDTensor
batch_sampler = DistributedBatchSampler(dataset,
batch_size=1,
shuffle=False,
drop_last=False)
loader = DataLoader(dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=eval_collate_fn)
batch_sampler = DistributedBatchSampler(
dataset, batch_size=1, shuffle=False, drop_last=False)
loader = DataLoader(
dataset,
batch_sampler=batch_sampler,
places=device,
num_workers=FLAGS.num_workers,
return_list=True,
collate_fn=eval_collate_fn)
pretrained = FLAGS.eval_only and FLAGS.weights is None
model = yolov3_darknet53(num_classes=dataset.num_classes,
model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
model = yolov3_darknet53(
num_classes=dataset.num_classes,
model_mode='eval' if FLAGS.eval_only else 'train',
pretrained=pretrained)
if FLAGS.pretrain_weights and not FLAGS.eval_only:
model.load(FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
model.load(
FLAGS.pretrain_weights, skip_mismatch=True, reset_optimizer=True)
optim = make_optimizer(len(batch_sampler), parameter_list=model.parameters())
optim = make_optimizer(
len(batch_sampler), parameter_list=model.parameters())
model.prepare(optim,
YoloLoss(num_classes=dataset.num_classes),
inputs=inputs, labels=labels,
device=FLAGS.device)
model.prepare(
optim,
YoloLoss(num_classes=dataset.num_classes),
inputs=inputs,
labels=labels,
device=FLAGS.device)
# NOTE: we implement COCO metric of YOLOv3 model here, separately
# from 'prepare' and 'fit' framework for follwing reason:
......@@ -149,7 +159,8 @@ def main():
preds = model.predict(loader, stack_outputs=False)
_, _, _, img_ids, bboxes = preds
anno_path = os.path.join(FLAGS.data, 'annotations/instances_val2017.json')
anno_path = os.path.join(FLAGS.data,
'annotations/instances_val2017.json')
coco_metric = COCOMetric(anno_path=anno_path, with_background=False)
for img_id, bbox in zip(img_ids, bboxes):
coco_metric.update(img_id, bbox)
......@@ -176,7 +187,9 @@ def main():
if __name__ == '__main__':
parser = argparse.ArgumentParser("Yolov3 Training on VOC")
parser.add_argument(
"--data", type=str, default='dataset/voc',
"--data",
type=str,
default='dataset/voc',
help="path to dataset directory")
parser.add_argument(
"--device", type=str, default='gpu', help="device to use, gpu or cpu")
......@@ -187,23 +200,38 @@ if __name__ == '__main__':
parser.add_argument(
"-e", "--epoch", default=300, type=int, help="number of epoch")
parser.add_argument(
"--no_mixup_epoch", default=30, type=int,
"--no_mixup_epoch",
default=30,
type=int,
help="number of the last N epoch without image mixup")
parser.add_argument(
'--lr', '--learning-rate', default=0.001, type=float, metavar='LR',
'--lr',
'--learning-rate',
default=0.001,
type=float,
metavar='LR',
help='initial learning rate')
parser.add_argument(
"-b", "--batch_size", default=8, type=int, help="batch size")
parser.add_argument(
"-j", "--num_workers", default=4, type=int, help="reader worker number")
"-j",
"--num_workers",
default=4,
type=int,
help="reader worker number")
parser.add_argument(
"-p", "--pretrain_weights", default=None, type=str,
"-p",
"--pretrain_weights",
default=None,
type=str,
help="path to pretrained weights")
parser.add_argument(
"-r", "--resume", default=None, type=str,
help="path to model weights")
"-r", "--resume", default=None, type=str, help="path to model weights")
parser.add_argument(
"-w", "--weights", default=None, type=str,
"-w",
"--weights",
default=None,
type=str,
help="path to weights for evaluation")
FLAGS = parser.parse_args()
assert FLAGS.data, "error: must provide data path"
......
......@@ -73,6 +73,7 @@ class ConvBNLayer(fluid.dygraph.Layer):
out = fluid.layers.leaky_relu(x=out, alpha=0.1)
return out
class YoloDetectionBlock(fluid.dygraph.Layer):
def __init__(self, ch_in, channel):
super(YoloDetectionBlock, self).__init__()
......@@ -81,38 +82,34 @@ class YoloDetectionBlock(fluid.dygraph.Layer):
"channel {} cannot be divided by 2".format(channel)
self.conv0 = ConvBNLayer(
ch_in=ch_in,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
ch_in=ch_in, ch_out=channel, filter_size=1, stride=1, padding=0)
self.conv1 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.conv2 = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.conv3 = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
self.route = ConvBNLayer(
ch_in=channel*2,
ch_in=channel * 2,
ch_out=channel,
filter_size=1,
stride=1,
padding=0)
self.tip = ConvBNLayer(
ch_in=channel,
ch_out=channel*2,
ch_out=channel * 2,
filter_size=3,
stride=1,
padding=1)
......@@ -149,8 +146,10 @@ class YOLOv3(Model):
"model_mode should be 'train' 'eval' or 'test', but got " \
"{}".format(model_mode)
self.model_mode = str.lower(model_mode)
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
self.valid_thresh = 0.005
self.nms_thresh = 0.45
......@@ -158,7 +157,10 @@ class YOLOv3(Model):
self.nms_posk = 100
self.draw_thresh = 0.5
self.backbone = darknet53(pretrained=(model_mode=='train'))
self.backbone = darknet53(
pretrained=(model_mode == 'train'),
with_pool=False,
num_classes=-1)
self.block_outputs = []
self.yolo_blocks = []
self.route_blocks = []
......@@ -173,32 +175,46 @@ class YOLOv3(Model):
block_out = self.add_sublayer(
"block_out_{}".format(idx),
Conv2D(num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
Conv2D(
num_channels=1024 // (2**idx),
num_filters=num_filters,
filter_size=1,
act=None,
param_attr=ParamAttr(
initializer=fluid.initializer.Normal(0., 0.02)),
bias_attr=ParamAttr(
initializer=fluid.initializer.Constant(0.0),
regularizer=L2Decay(0.))))
self.block_outputs.append(block_out)
if idx < 2:
route = self.add_sublayer(
"route2_{}".format(idx),
ConvBNLayer(ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
ConvBNLayer(
ch_in=512 // (2**idx),
ch_out=256 // (2**idx),
filter_size=1,
act='leaky_relu'))
self.route_blocks.append(route)
def extract_feats(self, inputs):
out = self.backbone.conv0(inputs)
out = self.backbone.downsample0(out)
blocks = []
for i, conv_block_i in enumerate(
self.backbone.darknet53_conv_block_list):
out = conv_block_i(out)
blocks.append(out)
if i < len(self.backbone.stages) - 1:
out = self.backbone.downsample_list[i](out)
return blocks[-1:-4:-1]
def forward(self, img_id, img_shape, inputs):
outputs = []
boxes = []
scores = []
downsample = 32
feats = self.backbone(inputs)
feats = self.extract_feats(inputs)
route = None
for idx, feat in enumerate(feats):
if idx > 0:
......@@ -233,15 +249,18 @@ class YOLOv3(Model):
if self.model_mode == 'train':
return outputs
preds = [img_id,
fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(boxes, axis=1),
scores=fluid.layers.concat(scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)]
preds = [
img_id, fluid.layers.multiclass_nms(
bboxes=fluid.layers.concat(
boxes, axis=1),
scores=fluid.layers.concat(
scores, axis=2),
score_threshold=self.valid_thresh,
nms_top_k=self.nms_topk,
keep_top_k=self.nms_posk,
nms_threshold=self.nms_thresh,
background_label=-1)
]
if self.model_mode == 'test':
return preds
......@@ -249,14 +268,17 @@ class YOLOv3(Model):
# model_mode == "eval"
return outputs + preds
class YoloLoss(Loss):
def __init__(self, num_classes=80, num_max_boxes=50):
super(YoloLoss, self).__init__()
self.num_classes = num_classes
self.num_max_boxes = num_max_boxes
self.ignore_thresh = 0.7
self.anchors = [10, 13, 16, 30, 33, 23, 30, 61, 62, 45,
59, 119, 116, 90, 156, 198, 373, 326]
self.anchors = [
10, 13, 16, 30, 33, 23, 30, 61, 62, 45, 59, 119, 116, 90, 156, 198,
373, 326
]
self.anchor_masks = [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
def forward(self, outputs, labels):
......@@ -265,7 +287,7 @@ class YoloLoss(Loss):
losses = []
for idx, out in enumerate(outputs):
if idx == 3: break # debug
if idx == 3: break # debug
anchor_mask = self.anchor_masks[idx]
loss = fluid.layers.yolov3_loss(
x=out,
......@@ -284,8 +306,10 @@ class YoloLoss(Loss):
return losses
def _yolov3_darknet(num_layers=53, num_classes=80,
model_mode='train', pretrained=True):
def _yolov3_darknet(num_layers=53,
num_classes=80,
model_mode='train',
pretrained=True):
model = YOLOv3(num_classes, model_mode)
if pretrained:
assert num_layers in pretrain_infos.keys(), \
......
......@@ -20,6 +20,7 @@ import traceback
import numpy as np
__all__ = [
"Compose",
'ColorDistort',
'RandomExpand',
'RandomCrop',
......@@ -33,6 +34,37 @@ __all__ = [
]
class Compose(object):
"""Composes several transforms together.
Args:
transforms (list of ``Transform`` objects): list of transforms to compose.
"""
def __init__(self, transforms):
self.transforms = transforms
def __call__(self, *data):
for f in self.transforms:
try:
data = f(*data)
except Exception as e:
stack_info = traceback.format_exc()
print("fail to perform transform [{}] with error: "
"{} and stack:\n{}".format(f, e, str(stack_info)))
raise e
return data
def __repr__(self):
format_string = self.__class__.__name__ + '('
for t in self.transforms:
format_string += '\n'
format_string += ' {0}'.format(t)
format_string += '\n)'
return format_string
class ColorDistort(object):
"""Random color distortion.
......@@ -147,7 +179,10 @@ class RandomExpand(object):
fill_value (list): color value used to fill the canvas. in RGB order.
"""
def __init__(self, ratio=4., prob=0.5, fill_value=[123.675, 116.28, 103.53]):
def __init__(self,
ratio=4.,
prob=0.5,
fill_value=[123.675, 116.28, 103.53]):
assert ratio > 1.01, "expand ratio must be larger than 1.01"
self.ratio = ratio
self.prob = prob
......@@ -493,8 +528,7 @@ def _crop_box_with_center_constraint(box, crop):
cropped_box[:, :2] -= crop[:2]
cropped_box[:, 2:] -= crop[:2]
centers = (box[:, :2] + box[:, 2:]) / 2
valid = np.logical_and(
crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(crop[:2] <= centers, centers < crop[2:]).all(axis=1)
valid = np.logical_and(
valid, (cropped_box[:, :2] < cropped_box[:, 2:]).all(axis=1))
return cropped_box, np.where(valid)[0]
......@@ -517,8 +551,8 @@ def random_crop(inputs):
for i in range(50):
scale = np.random.uniform(*scaling)
min_ar, max_ar = aspect_ratios
ar = np.random.uniform(max(min_ar, scale**2),
min(max_ar, scale**-2))
ar = np.random.uniform(
max(min_ar, scale**2), min(max_ar, scale**-2))
crop_h = int(h * scale / np.sqrt(ar))
crop_w = int(w * scale * np.sqrt(ar))
crop_y = np.random.randint(0, h - crop_h)
......@@ -529,7 +563,8 @@ def random_crop(inputs):
continue
cropped_box, valid_ids = _crop_box_with_center_constraint(
gt_box, np.array(crop_box, dtype=np.float32))
gt_box, np.array(
crop_box, dtype=np.float32))
if valid_ids.size > 0:
found = True
break
......@@ -545,9 +580,7 @@ def random_crop(inputs):
class ResizeImage(object):
def __init__(self,
target_size=0,
interp=cv2.INTER_CUBIC):
def __init__(self, target_size=0, interp=cv2.INTER_CUBIC):
"""
Rescale image to the specified target size.
If target_size is list, selected a scale randomly as the specified
......@@ -574,8 +607,8 @@ class ResizeImage(object):
raise ImageError('{}: image is not 3-dimensional.'.format(self))
im_scale_x = float(self.target_size) / float(im.shape[1])
im_scale_y = float(self.target_size) / float(im.shape[0])
resize_w = self.target_size
resize_h = self.target_size
resize_w = self.target_size
resize_h = self.target_size
im = cv2.resize(
im,
......@@ -586,4 +619,3 @@ class ResizeImage(object):
interpolation=self.interp)
return [im_id, im_shape, im, gt_bbox, gt_class, gt_score]
......@@ -215,15 +215,13 @@ class ProgBarLogger(Callback):
if self.train_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
# if steps is not None, last step will update in on_epoch_end
if self.steps and self.train_step < self.steps:
self._updates(logs, 'train')
else:
if self.steps is None or self.train_step < self.steps:
self._updates(logs, 'train')
def on_epoch_end(self, epoch, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
if self.train_step % self.log_freq != 0 and self.verbose and ParallelEnv(
).local_rank == 0:
self._updates(logs, 'train')
def on_eval_begin(self, logs=None):
......@@ -238,20 +236,20 @@ class ProgBarLogger(Callback):
def on_eval_batch_end(self, step, logs=None):
logs = logs or {}
self.eval_step = step
self.eval_step += 1
samples = logs.get('batch_size', 1)
self.evaled_samples += samples
if self.eval_step % self.log_freq == 0 and self.verbose and ParallelEnv(
).local_rank == 0:
# if steps is not None, last step will update in on_epoch_end
if self.eval_steps and self.eval_step < self.eval_steps:
if self.eval_steps is None or self.eval_step < self.eval_steps:
self._updates(logs, 'eval')
def on_eval_end(self, logs=None):
logs = logs or {}
if self.verbose and ParallelEnv().local_rank == 0:
self._updates(logs, 'eval')
if self.eval_step % self.log_freq != 0:
self._updates(logs, 'eval')
print('Eval samples: %d' % (self.evaled_samples))
......
此差异已折叠。
此差异已折叠。
......@@ -130,6 +130,18 @@ class Optimizer(object):
return True
return False
def state_dict(self):
return self.optimizer.state_dict()
def set_dict(self, state_dict):
return self.optimizer.set_dict(state_dict)
def get_opti_var_name_list(self):
return self.optimizer.get_opti_var_name_list()
def current_step_lr(self):
return self.optimizer.current_step_lr()
def minimize(self, loss, use_data_parallel=False, model=None):
param_list = dict()
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
......@@ -190,7 +190,8 @@ class TestModel(unittest.TestCase):
eval_result = model.evaluate(val_dataset, batch_size=batch_size)
output = model.predict(test_dataset, batch_size=batch_size)
output = model.predict(
test_dataset, batch_size=batch_size, stack_outputs=True)
np.testing.assert_equal(output[0].shape[0], len(test_dataset))
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册