提交 1fac53aa 编写于 作者: X xyzhou-puck

update nlp models

上级 85e422bb
...@@ -16,14 +16,60 @@ ...@@ -16,14 +16,60 @@
import paddle.fluid as fluid import paddle.fluid as fluid
from hapi.metrics import Accuracy from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train(): class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml") config = Config(yaml_file="./bert.yaml")
config.build() config.build()
...@@ -35,8 +81,6 @@ def train(): ...@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path) bert_config = BertConfig(config.bert_config_path)
bert_config.print_config() bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer( tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case) vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
...@@ -52,14 +96,24 @@ def train(): ...@@ -52,14 +96,24 @@ def train():
return BertInputExample( return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label) uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader( train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv", "./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"], tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64, max_seq_length=config.max_seq_len,
batch_size=32, batch_size=config.batch_size,
line_processor=mnli_line_processor) line_processor=mnli_line_processor)
num_train_examples = len(bert_dataloader.dataset) dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion) warmup_steps = int(max_train_steps * config.warmup_proportion)
...@@ -82,7 +136,6 @@ def train(): ...@@ -82,7 +136,6 @@ def train():
config, config,
bert_config, bert_config,
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = Optimizer(
...@@ -106,10 +159,15 @@ def train(): ...@@ -106,10 +159,15 @@ def train():
cls_model.bert_layer.init_parameters( cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose) config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch) # do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model # do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__': if __name__ == '__main__':
cls_model = train() main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
...@@ -18,7 +18,7 @@ batch_size: 32 ...@@ -18,7 +18,7 @@ batch_size: 32
in_tokens: False in_tokens: False
do_lower_case: True do_lower_case: True
random_seed: 5512 random_seed: 5512
use_cuda: False use_cuda: True
shuffle: True shuffle: True
do_train: True do_train: True
do_test: True do_test: True
......
...@@ -16,14 +16,60 @@ ...@@ -16,14 +16,60 @@
import paddle.fluid as fluid import paddle.fluid as fluid
from hapi.metrics import Accuracy from hapi.metrics import Accuracy
from hapi.configure import Config from hapi.configure import Config
from hapi.text.bert import BertEncoder
from paddle.fluid.dygraph import Linear, Layer
from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input from hapi.model import set_device, Model, SoftmaxWithCrossEntropy, Input
from cls import ClsModelLayer
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample from hapi.text.bert import Optimizer, BertConfig, BertDataLoader, BertInputExample
def train(): class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
pred = self.cls_fc(cls_feats)
return pred
def main():
config = Config(yaml_file="./bert.yaml") config = Config(yaml_file="./bert.yaml")
config.build() config.build()
...@@ -35,8 +81,6 @@ def train(): ...@@ -35,8 +81,6 @@ def train():
bert_config = BertConfig(config.bert_config_path) bert_config = BertConfig(config.bert_config_path)
bert_config.print_config() bert_config.print_config()
trainer_count = fluid.dygraph.parallel.Env().nranks
tokenizer = tokenization.FullTokenizer( tokenizer = tokenization.FullTokenizer(
vocab_file=config.vocab_path, do_lower_case=config.do_lower_case) vocab_file=config.vocab_path, do_lower_case=config.do_lower_case)
...@@ -52,15 +96,26 @@ def train(): ...@@ -52,15 +96,26 @@ def train():
return BertInputExample( return BertInputExample(
uid=uid, text_a=text_a, text_b=text_b, label=label) uid=uid, text_a=text_a, text_b=text_b, label=label)
bert_dataloader = BertDataLoader( train_dataloader = BertDataLoader(
"./data/glue_data/MNLI/train.tsv", "./data/glue_data/MNLI/train.tsv",
tokenizer, ["contradiction", "entailment", "neutral"], tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=64, max_seq_length=config.max_seq_len,
batch_size=32, batch_size=config.batch_size,
line_processor=mnli_line_processor, line_processor=mnli_line_processor,
mode="leveldb") mode="leveldb",
phase="train")
num_train_examples = len(bert_dataloader.dataset) dev_dataloader = BertDataLoader(
"./data/glue_data/MNLI/dev_matched.tsv",
tokenizer, ["contradiction", "entailment", "neutral"],
max_seq_length=config.max_seq_len,
batch_size=config.batch_size,
line_processor=mnli_line_processor,
shuffle=False,
phase="predict")
trainer_count = fluid.dygraph.parallel.Env().nranks
num_train_examples = len(train_dataloader.dataset)
max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count max_train_steps = config.epoch * num_train_examples // config.batch_size // trainer_count
warmup_steps = int(max_train_steps * config.warmup_proportion) warmup_steps = int(max_train_steps * config.warmup_proportion)
...@@ -83,7 +138,6 @@ def train(): ...@@ -83,7 +138,6 @@ def train():
config, config,
bert_config, bert_config,
len(["contradiction", "entailment", "neutral"]), len(["contradiction", "entailment", "neutral"]),
is_training=True,
return_pooled_out=True) return_pooled_out=True)
optimizer = Optimizer( optimizer = Optimizer(
...@@ -107,10 +161,15 @@ def train(): ...@@ -107,10 +161,15 @@ def train():
cls_model.bert_layer.init_parameters( cls_model.bert_layer.init_parameters(
config.init_pretraining_params, verbose=config.verbose) config.init_pretraining_params, verbose=config.verbose)
cls_model.fit(train_data=bert_dataloader.dataloader, epochs=config.epoch) # do train
cls_model.fit(train_data=train_dataloader.dataloader,
epochs=config.epoch,
save_dir=config.checkpoints)
return cls_model # do eval
cls_model.evaluate(
eval_data=test_dataloader.dataloader, batch_size=config.batch_size)
if __name__ == '__main__': if __name__ == '__main__':
cls_model = train() main()
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"dygraph transformer layers"
import six
import json
import numpy as np
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph import Linear, Layer
from hapi.text.bert import BertEncoder
from hapi.model import Model
class ClsModelLayer(Model):
"""
classify model
"""
def __init__(self,
args,
config,
num_labels,
is_training=True,
return_pooled_out=True,
use_fp16=False):
super(ClsModelLayer, self).__init__()
self.config = config
self.is_training = is_training
self.use_fp16 = use_fp16
self.loss_scaling = args.loss_scaling
self.bert_layer = BertEncoder(
config=self.config, return_pooled_out=True, use_fp16=self.use_fp16)
self.cls_fc = Linear(
input_dim=self.config["hidden_size"],
output_dim=num_labels,
param_attr=fluid.ParamAttr(
name="cls_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_out_b", initializer=fluid.initializer.Constant(0.)))
def forward(self, src_ids, position_ids, sentence_ids, input_mask):
"""
forward
"""
enc_output, next_sent_feat = self.bert_layer(src_ids, position_ids,
sentence_ids, input_mask)
cls_feats = fluid.layers.dropout(
x=next_sent_feat,
dropout_prob=0.1,
dropout_implementation="upscale_in_train")
logits = self.cls_fc(cls_feats)
return logits
此差异已折叠。
#!/bin/bash
BERT_BASE_PATH="./data/pretrained_models/uncased_L-12_H-768_A-12/"
TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models"
# start fine-tuning
python3.7 -m paddle.distributed.launch --started_port 8899 --selected_gpus=0,1,2,3 bert_classifier.py\
--use_cuda true \
--do_train true \
--do_test true \
--batch_size 64 \
--init_pretraining_params ${BERT_BASE_PATH}/dygraph_params/ \
--data_dir ${DATA_PATH} \
--vocab_path ${BERT_BASE_PATH}/vocab.txt \
--checkpoints ${CKPT_PATH} \
--save_steps 1000 \
--weight_decay 0.01 \
--warmup_proportion 0.1 \
--validation_steps 100 \
--epoch 3 \
--max_seq_len 128 \
--bert_config_path ${BERT_BASE_PATH}/bert_config.json \
--learning_rate 5e-5 \
--skip_steps 10 \
--shuffle true
...@@ -4,7 +4,7 @@ TASK_NAME='MNLI' ...@@ -4,7 +4,7 @@ TASK_NAME='MNLI'
DATA_PATH="./data/glue_data/MNLI/" DATA_PATH="./data/glue_data/MNLI/"
CKPT_PATH="./data/saved_model/mnli_models" CKPT_PATH="./data/saved_model/mnli_models"
export CUDA_VISIBLE_DEVICES=7 export CUDA_VISIBLE_DEVICES=0
# start fine-tuning # start fine-tuning
python3.7 bert_classifier.py\ python3.7 bert_classifier.py\
......
...@@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler ...@@ -30,6 +30,7 @@ from hapi.distributed import DistributedBatchSampler
from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor from hapi.text.bert.data_processor import DataProcessor, XnliProcessor, ColaProcessor, MrpcProcessor, MnliProcessor
from hapi.text.bert.batching import prepare_batch_data from hapi.text.bert.batching import prepare_batch_data
import hapi.text.tokenizer.tokenization as tokenization import hapi.text.tokenizer.tokenization as tokenization
from paddle.fluid.dygraph.parallel import ParallelEnv, ParallelStrategy
__all__ = [ __all__ = [
'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset', 'BertInputExample', 'BertInputFeatures', 'SingleSentenceDataset',
...@@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset): ...@@ -227,6 +228,9 @@ class SingleSentenceDataset(Dataset):
if line_processor is None: if line_processor is None:
line_processor = default_line_processor line_processor = default_line_processor
if ParallelEnv().nranks > 1:
leveldb_file = leveldb_file + "_" + str(ParallelEnv().local_rank)
if not os.path.exists(leveldb_file): if not os.path.exists(leveldb_file):
print("putting data %s into leveldb %s" % print("putting data %s into leveldb %s" %
(input_file, leveldb_file)) (input_file, leveldb_file))
...@@ -384,7 +388,12 @@ class BertDataLoader(object): ...@@ -384,7 +388,12 @@ class BertDataLoader(object):
quotechar=None, quotechar=None,
device=fluid.CPUPlace(), device=fluid.CPUPlace(),
num_workers=0, num_workers=0,
return_list=True): return_list=True,
phase="train"):
assert phase in [
"train", "predict", "test"
], "phase of BertDataLoader should be in [train, predict, test], but get %s" % phase
self.dataset = SingleSentenceDataset(tokenizer, label_list, self.dataset = SingleSentenceDataset(tokenizer, label_list,
max_seq_length, mode) max_seq_length, mode)
...@@ -394,15 +403,21 @@ class BertDataLoader(object): ...@@ -394,15 +403,21 @@ class BertDataLoader(object):
input_file, label_list, max_seq_length, tokenizer, input_file, label_list, max_seq_length, tokenizer,
line_processor, delimiter, quotechar) line_processor, delimiter, quotechar)
elif mode == "leveldb": elif mode == "leveldb":
#prepare_leveldb(self, input_file, leveldb_file, label_list, max_seq_length, tokenizer, line_processor=None, delimiter="\t", quotechar=None):
self.dataset.prepare_leveldb(input_file, leveldb_file, label_list, self.dataset.prepare_leveldb(input_file, leveldb_file, label_list,
max_seq_length, tokenizer, max_seq_length, tokenizer,
line_processor, delimiter, quotechar) line_processor, delimiter, quotechar)
else: else:
raise ValueError("mode should be in [all_in_memory, leveldb]") raise ValueError("mode should be in [all_in_memory, leveldb]")
self.sampler = DistributedBatchSampler( if phase == "train":
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last) self.sampler = DistributedBatchSampler(
self.dataset, batch_size, shuffle=shuffle, drop_last=drop_last)
elif phase == "test" or phase == "predict":
self.sampler = BatchSampler(
dataset=self.dataset,
batch_size=batch_size,
shuffle=shuffle,
drop_last=drop_last)
self.dataloader = DataLoader( self.dataloader = DataLoader(
dataset=self.dataset, dataset=self.dataset,
......
...@@ -48,8 +48,8 @@ __all__ = [ ...@@ -48,8 +48,8 @@ __all__ = [
'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode', 'RNNCell', 'BasicLSTMCell', 'BasicGRUCell', 'RNN', 'DynamicDecode',
'BeamSearchDecoder', 'MultiHeadAttention', 'FFN', 'BeamSearchDecoder', 'MultiHeadAttention', 'FFN',
'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer', 'TransformerEncoderLayer', 'TransformerEncoder', 'TransformerDecoderLayer',
'TransformerDecoder', 'TransformerBeamSearchDecoder', 'BiGRU', 'TransformerDecoder', 'TransformerBeamSearchDecoder', 'Linear_chain_crf',
'Linear_chain_crf', 'Crf_decoding', 'SequenceTagging' 'Crf_decoding', 'SequenceTagging'
] ]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册