提交 1fac53aa 编写于 作者: X xyzhou-puck

update nlp models

上级 85e422bb
......@@ -16,14 +16,60 @@
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train():
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
......@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
......@@ -52,14 +96,24 @@ def train():
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader(
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64,
batch_size=32,
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor)
num_train_examples = len(bert_dataloader.dataset)
dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
......@@ -82,7 +136,6 @@ def train():
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True)
optimizer = Optimizer(
......@@ -106,10 +159,15 @@ def train():
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
cls_model = train()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
......@@ -18,7 +18,7 @@ batch_size: 32
in_tokens: False
do_lower_case: True
random_seed: 5512
use_cuda: False
use_cuda: True
shuffle: True
do_train: True
do_test: True
......
......@@ -16,14 +16,60 @@
import paddle.fluid as fluid
from hapi.metrics import Accuracy
from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train():
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml")
config.build()
......@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path)
bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
......@@ -52,15 +96,26 @@ def train():
return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader(
train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64,
batch_size=32,
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
mode="leveldb")
mode="leveldb",
phase="train")
num_train_examples = len(bert_dataloader.dataset)
dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion)
......@@ -83,7 +138,6 @@ def train():
config,
bert_config,
len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True)
optimizer = Optimizer(
......@@ -107,10 +161,15 @@ def train():
cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch)
# do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model
# do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__':
cls_model = train()
main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
此差异已折叠。
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
......@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=7
export CUDA_VISIBLE_DEVICES=0
# start fine-tuning
python3.7 bert_classifier.py\
......
......@@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data
import hapi.text.tokenizer.tokenization as tokenization
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
__all__ = [
'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset',
......@@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset):
if line_processor is None:
line_processor = default_line_processor
if ParallelEnv().nranks > 1:
leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)
if not os.path.exists(leveldb_file):
print("putting data %s into leveldb %s" %
(input_file, leveldb_file))
......@@ -384,7 +388,12 @@ class BertDataLoader(object):
quotechar=None,
device=fluid.CPUPlace(),
num_workers=0,
return_list=True):
return_list=True,
phase="train"):
assert phase in [
"train", "predict", "test"
], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase
self.dataset = SingleSentenceDataset(tokenizer, label_list,
max_seq_length, mode)
......@@ -394,15 +403,21 @@ class BertDataLoader(object):
input_file, label_list, max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
elif mode == "leveldb":
#prepare_leveldb(self, input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor=None, delimiter="\t", quotechar=None):
self.dataset.prepare_leveldb(input_file, leveldb_file, label_list,
max_seq_length, tokenizer,
line_processor, delimiter, quotechar)
else:
raise ValueError("mode should be in [all_in_memory, leveldb]")
self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
if phase == "train":
self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
elif phase == "test" or phase == "predict":
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader(
dataset=self.dataset,
......
......@@ -48,8 +48,8 @@ __all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'BiGRU',
'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging'
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf',
'Crf_decoding', 'SequenceTagging'
]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册