未验证 提交 f53f360a 编写于 作者: G Guo Sheng 提交者: GitHub

Merge pull request #95 from guoshengCS/update-2.0-beta-hapi

Update Transformer, seq2seq, sequence_tagging to adapt to 2.0-beta hapi apis
**仍在开发中,待完成**
1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
2. unzip data: tar -zvxf bert_data.tar.gz
......
**仍在开发中,待完成**
0. python3.7 -m pip install leveldb
1. download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
......
## 简介
**仍在开发中,待完成**
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
......
......@@ -19,11 +19,12 @@ import random
from functools import partial
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.fluid.io import DataLoader
from paddle.static import InputSpec as Input
from paddle.incubate.hapi.model import Input, set_device
from args import parse_args
from seq2seq_base import BaseInferModel
from seq2seq_attn import AttentionInferModel
......@@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def do_predict(args):
device = set_device("gpu" if args.use_gpu else "cpu")
device = paddle.set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
# define model
......@@ -84,19 +85,21 @@ def do_predict(args):
return_list=True)
model_maker = AttentionInferModel if args.attention else BaseInferModel
model = model_maker(
args.src_vocab_size,
args.tar_vocab_size,
args.hidden_size,
args.hidden_size,
args.num_layers,
args.dropout,
bos_id=bos_id,
eos_id=eos_id,
beam_size=args.beam_size,
max_out_len=256)
model = paddle.Model(
model_maker(
args.src_vocab_size,
args.tar_vocab_size,
args.hidden_size,
args.hidden_size,
args.num_layers,
args.dropout,
bos_id=bos_id,
eos_id=eos_id,
beam_size=args.beam_size,
max_out_len=256),
inputs=inputs)
model.prepare(inputs=inputs, device=device)
model.prepare()
# load the trained model
assert args.reload_model, (
......
......@@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from seq2seq_base import Encoder
......@@ -138,7 +135,7 @@ class Decoder(Layer):
return predict
class AttentionModel(Model):
class AttentionModel(Layer):
def __init__(self,
src_vocab_size,
trg_vocab_size,
......
......@@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr
from paddle.fluid.initializer import UniformInitializer
from paddle.fluid.dygraph import Embedding, Linear, Layer
from paddle.fluid.layers import BeamSearchDecoder
from paddle.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell
class CrossEntropyCriterion(Loss):
class CrossEntropyCriterion(Layer):
def __init__(self):
super(CrossEntropyCriterion, self).__init__()
def forward(self, outputs, labels):
predict, (trg_length, label) = outputs[0], labels
def forward(self, predict, trg_length, label):
# for target padding mask
mask = layers.sequence_mask(
trg_length, maxlen=layers.shape(predict)[1], dtype=predict.dtype)
......@@ -140,7 +136,7 @@ class Decoder(Layer):
return predict
class BaseModel(Model):
class BaseModel(Layer):
def __init__(self,
src_vocab_size,
trg_vocab_size,
......
......@@ -15,14 +15,15 @@
import logging
import os
import random
from args import parse_args
from functools import partial
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.io import DataLoader
from paddle.static import InputSpec as Input
from paddle.incubate.hapi.model import Input, set_device
from args import parse_args
from seq2seq_base import BaseModel, CrossEntropyCriterion
from seq2seq_attn import AttentionModel
from reader import create_data_loader
......@@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls
def do_train(args):
device = set_device("gpu" if args.use_gpu else "cpu")
device = paddle.set_device("gpu" if args.use_gpu else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
if args.enable_ce:
......@@ -58,9 +59,11 @@ def do_train(args):
model_maker = get_model_cls(
AttentionModel) if args.attention else get_model_cls(BaseModel)
model = model_maker(args.src_vocab_size, args.tar_vocab_size,
args.hidden_size, args.hidden_size, args.num_layers,
args.dropout)
model = paddle.Model(
model_maker(args.src_vocab_size, args.tar_vocab_size, args.hidden_size,
args.hidden_size, args.num_layers, args.dropout),
inputs=inputs,
labels=labels)
grad_clip = fluid.clip.GradientClipByGlobalNorm(
clip_norm=args.max_grad_norm)
optimizer = fluid.optimizer.Adam(
......@@ -69,13 +72,7 @@ def do_train(args):
grad_clip=grad_clip)
ppl_metric = PPL(reset_freq=100) # ppl for every 100 batches
model.prepare(
optimizer,
CrossEntropyCriterion(),
ppl_metric,
inputs=inputs,
labels=labels,
device=device)
model.prepare(optimizer, CrossEntropyCriterion(), ppl_metric)
model.fit(train_data=train_loader,
eval_data=eval_loader,
epochs=args.max_epoch,
......
......@@ -15,14 +15,13 @@
import math
import functools
import paddle
import paddle.fluid as fluid
from paddle.metric import Metric
from paddle.text import BasicLSTMCell
from paddle.incubate.hapi.metrics import Metric
from paddle.incubate.hapi.callbacks import ProgBarLogger
from paddle.incubate.hapi.text import BasicLSTMCell
class TrainCallback(ProgBarLogger):
class TrainCallback(paddle.callbacks.ProgBarLogger):
def __init__(self, ppl, log_freq, verbose=2):
super(TrainCallback, self).__init__(log_freq, verbose)
self.ppl = ppl
......@@ -58,7 +57,7 @@ class PPL(Metric):
self.reset_freq = reset_freq
self.reset()
def add_metric_op(self, pred, seq_length, label):
def compute(self, pred, seq_length, label):
word_num = fluid.layers.reduce_sum(seq_length)
return word_num
......
......@@ -18,9 +18,10 @@ SequenceTagging eval structure
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device
from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
......@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def main(args):
place = set_device(args.device)
place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [
......@@ -45,14 +46,14 @@ def main(args):
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="test")
model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="test"),
inputs=inputs,
labels=labels)
model.mode = "test"
model.prepare(
metrics=ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=place)
model.prepare(metrics=ChunkEval(num_labels))
model.load(args.init_from_checkpoint, skip_mismatch=True)
eval_result = model.evaluate(
......
......@@ -20,9 +20,10 @@ from __future__ import print_function
import six
import paddle
import paddle.fluid as fluid
from paddle.fluid.layers.utils import flatten
from paddle.incubate.hapi.model import Input, set_device
from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
......@@ -31,7 +32,7 @@ from utils.configure import PDConfig
def main(args):
place = set_device(args.device)
place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [
......@@ -46,10 +47,13 @@ def main(args):
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="predict")
model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="predict"),
inputs=inputs)
model.mode = "test"
model.prepare(inputs=inputs)
model.prepare()
model.load(args.init_from_checkpoint, skip_mismatch=True)
......
......@@ -25,17 +25,16 @@ import math
import argparse
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.incubate.hapi.metrics import Metric
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import SequenceTagging
from paddle.metric import Metric
from paddle.text import SequenceTagging
from utils.check import check_gpu, check_version
from utils.configure import PDConfig
class SeqTagging(Model):
class SeqTagging(fluid.dygraph.Layer):
def __init__(self, args, vocab_size, num_labels, length=None,
mode="train"):
super(SeqTagging, self).__init__()
......@@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer):
return (num_infer_chunks, num_label_chunks, num_correct_chunks)
class LacLoss(Loss):
class LacLoss(fluid.dygraph.Layer):
def __init__(self):
super(LacLoss, self).__init__()
pass
def forward(self, outputs, labels):
avg_cost = outputs[1]
def forward(self, *args):
avg_cost = args[1]
return avg_cost
......@@ -149,7 +148,7 @@ class ChunkEval(Metric):
int(math.ceil((num_labels - 1) / 2.0)), "IOB")
self.reset()
def add_metric_op(self, *args):
def compute(self, *args):
crf_decode = args[0]
lengths = args[2]
label = args[3]
......
......@@ -18,9 +18,10 @@ SequenceTagging network structure
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import AdamOptimizer
from paddle.incubate.hapi.model import Input, set_device
from paddle.static import InputSpec as Input
from sequence_tagging import SeqTagging, LacLoss, ChunkEval
from reader import LacDataset, LacDataLoader
......@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def main(args):
place = set_device(args.device)
place = paddle.set_device(args.device)
fluid.enable_dygraph(place) if args.dynamic else None
inputs = [
......@@ -48,19 +49,17 @@ def main(args):
vocab_size = dataset.vocab_size
num_labels = dataset.num_labels
model = SeqTagging(args, vocab_size, num_labels, mode="train")
model = paddle.Model(
SeqTagging(
args, vocab_size, num_labels, mode="train"),
inputs=inputs,
labels=labels)
optim = AdamOptimizer(
learning_rate=args.base_learning_rate,
parameter_list=model.parameters())
model.prepare(
optim,
LacLoss(),
ChunkEval(num_labels),
inputs=inputs,
labels=labels,
device=args.device)
model.prepare(optim, LacLoss(), ChunkEval(num_labels))
if args.init_from_checkpoint:
model.load(args.init_from_checkpoint)
......
......@@ -21,11 +21,11 @@ import paddle
import paddle.fluid as fluid
from paddle.io import DataLoader
from paddle.fluid.layers.utils import flatten
from paddle.static import InputSpec as Input
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from paddle.incubate.hapi.model import Input, set_device
from reader import prepare_infer_input, Seq2SeqDataset, Seq2SeqBatchSampler
from transformer import InferTransformer
......@@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def do_predict(args):
device = set_device("gpu" if args.use_cuda else "cpu")
device = paddle.set_device("gpu" if args.use_cuda else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
inputs = [
......@@ -99,37 +99,39 @@ def do_predict(args):
return_list=True)
# define model
transformer = InferTransformer(
args.src_vocab_size,
args.trg_vocab_size,
args.max_length + 1,
args.n_layer,
args.n_head,
args.d_key,
args.d_value,
args.d_model,
args.d_inner_hid,
args.prepostprocess_dropout,
args.attention_dropout,
args.relu_dropout,
args.preprocess_cmd,
args.postprocess_cmd,
args.weight_sharing,
args.bos_idx,
args.eos_idx,
beam_size=args.beam_size,
max_out_len=args.max_out_len)
transformer.prepare(inputs=inputs, device=device)
model = paddle.Model(
InferTransformer(
args.src_vocab_size,
args.trg_vocab_size,
args.max_length + 1,
args.n_layer,
args.n_head,
args.d_key,
args.d_value,
args.d_model,
args.d_inner_hid,
args.prepostprocess_dropout,
args.attention_dropout,
args.relu_dropout,
args.preprocess_cmd,
args.postprocess_cmd,
args.weight_sharing,
args.bos_idx,
args.eos_idx,
beam_size=args.beam_size,
max_out_len=args.max_out_len),
inputs)
model.prepare()
# load the trained model
assert args.init_from_params, (
"Please set init_from_params to load the infer model.")
transformer.load(args.init_from_params)
model.load(args.init_from_params)
# TODO: use model.predict when support variant length
f = open(args.output_file, "wb")
for data in data_loader():
finished_seq = transformer.test_batch(inputs=flatten(data))[0]
finished_seq = model.test_batch(inputs=flatten(data))[0]
finished_seq = np.transpose(finished_seq, [0, 2, 1])
for ins in finished_seq:
for beam_idx, beam in enumerate(ins):
......
......@@ -19,17 +19,16 @@ import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.io import DataLoader
from paddle.static import InputSpec as Input
from utils.configure import PDConfig
from utils.check import check_gpu, check_version
from paddle.incubate.hapi.model import Input, set_device
from paddle.incubate.hapi.callbacks import ProgBarLogger
from reader import create_data_loader
from transformer import Transformer, CrossEntropyCriterion
class TrainCallback(ProgBarLogger):
class TrainCallback(paddle.callbacks.ProgBarLogger):
def __init__(self,
args,
verbose=2,
......@@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger):
def do_train(args):
device = set_device("gpu" if args.use_cuda else "cpu")
device = paddle.set_device("gpu" if args.use_cuda else "cpu")
fluid.enable_dygraph(device) if args.eager_run else None
# set seed for CE
......@@ -119,14 +118,16 @@ def do_train(args):
eval_loader, eval_steps_fn) = create_data_loader(args, device)
# define model
transformer = Transformer(
args.src_vocab_size, args.trg_vocab_size, args.max_length + 1,
args.n_layer, args.n_head, args.d_key, args.d_value, args.d_model,
args.d_inner_hid, args.prepostprocess_dropout, args.attention_dropout,
args.relu_dropout, args.preprocess_cmd, args.postprocess_cmd,
args.weight_sharing, args.bos_idx, args.eos_idx)
transformer.prepare(
model = paddle.Model(
Transformer(args.src_vocab_size, args.trg_vocab_size,
args.max_length + 1, args.n_layer, args.n_head, args.d_key,
args.d_value, args.d_model, args.d_inner_hid,
args.prepostprocess_dropout, args.attention_dropout,
args.relu_dropout, args.preprocess_cmd,
args.postprocess_cmd, args.weight_sharing, args.bos_idx,
args.eos_idx), inputs, labels)
model.prepare(
fluid.optimizer.Adam(
learning_rate=fluid.layers.noam_decay(
args.d_model,
......@@ -135,32 +136,29 @@ def do_train(args):
beta1=args.beta1,
beta2=args.beta2,
epsilon=float(args.eps),
parameter_list=transformer.parameters()),
CrossEntropyCriterion(args.label_smooth_eps),
inputs=inputs,
labels=labels,
device=device)
parameter_list=model.parameters()),
CrossEntropyCriterion(args.label_smooth_eps))
## init from some checkpoint, to resume the previous training
if args.init_from_checkpoint:
transformer.load(args.init_from_checkpoint)
model.load(args.init_from_checkpoint)
## init from some pretrain models, to better solve the current task
if args.init_from_pretrain_model:
transformer.load(args.init_from_pretrain_model, reset_optimizer=True)
model.load(args.init_from_pretrain_model, reset_optimizer=True)
# model train
transformer.fit(train_data=train_loader,
eval_data=eval_loader,
epochs=args.epoch,
eval_freq=1,
save_freq=1,
save_dir=args.save_model,
callbacks=[
TrainCallback(
args,
train_steps_fn=train_steps_fn,
eval_steps_fn=eval_steps_fn)
])
model.fit(train_data=train_loader,
eval_data=eval_loader,
epochs=args.epoch,
eval_freq=1,
save_freq=1,
save_dir=args.save_model,
callbacks=[
TrainCallback(
args,
train_steps_fn=train_steps_fn,
eval_steps_fn=eval_steps_fn)
])
if __name__ == "__main__":
......
......@@ -19,9 +19,7 @@ import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from paddle.fluid.dygraph import Embedding, LayerNorm, Linear, Layer
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
from paddle.incubate.hapi.text import TransformerBeamSearchDecoder, DynamicDecode
from paddle.text import TransformerBeamSearchDecoder, DynamicDecode
def position_encoding_init(n_position, d_pos_vec):
......@@ -498,13 +496,12 @@ class WrapDecoder(Layer):
return logits
class CrossEntropyCriterion(Loss):
class CrossEntropyCriterion(Layer):
def __init__(self, label_smooth_eps):
super(CrossEntropyCriterion, self).__init__()
self.label_smooth_eps = label_smooth_eps
def forward(self, outputs, labels):
predict, (label, weights) = outputs[0], labels
def forward(self, predict, label, weights):
if self.label_smooth_eps:
label = layers.label_smooth(
label=layers.one_hot(
......@@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss):
return avg_cost
class Transformer(Model):
class Transformer(Layer):
"""
model
"""
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册