提交 aeb2bc54 编写于 作者: S Steffy-zxf 提交者: wuzewu

Update seq label task (#189)

* update seq-label task
上级 6cd5a649
......@@ -119,7 +119,8 @@ seq_label_task = hub.SequenceLabelTask(
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config)
config=config,
add_crf=False)
seq_label_task.finetune_and_eval()
```
......@@ -128,6 +129,7 @@ seq_label_task.finetune_and_eval()
1. `outputs["sequence_output"]`返回了ERNIE/BERT模型输入单词的对应输出,可以用于单词的特征表达。
2. `feed_list`中的inputs参数指名了ERNIE/BERT中的输入tensor的顺序,与SequenceLabelReader返回的结果一致。
3. `hub.SequenceLabelTask`通过输入特征,迁移的类别数,可以生成适用于序列标注的迁移任务`SequenceLabelTask`
4. `hub.SequenceLabelTask`通过add_crf, 选择是否加入crf作为decoder。如果add_crf=True, 则在预训练模型计算图加入fc+crf层,否则只在在预训练模型计算图加入fc层。
## 可视化
......
......@@ -79,13 +79,15 @@ if __name__ == '__main__':
strategy=hub.finetune.strategy.DefaultFinetuneStrategy())
# Define a sequence labeling finetune task by PaddleHub's API
# if add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config)
config=config,
add_crf=True)
# test data
data = [
......
......@@ -78,13 +78,15 @@ if __name__ == '__main__':
strategy=strategy)
# Define a sequence labeling finetune task by PaddleHub's API
# if add crf, the network use crf as decoder
seq_label_task = hub.SequenceLabelTask(
data_reader=reader,
feature=sequence_output,
feed_list=feed_list,
max_seq_len=args.max_seq_len,
num_classes=dataset.num_labels,
config=config)
config=config,
add_crf=True)
# Finetune and evaluate model by PaddleHub's API
# will finish training, evaluation, testing, save model automatically
......
......@@ -22,6 +22,7 @@ import contextlib
import time
import copy
import logging
import numpy as np
import paddle.fluid as fluid
from tb_paddle import SummaryWriter
......@@ -305,6 +306,10 @@ class BasicTask(object):
return [_places[0]]
return _places
@property
def return_numpy(self):
return True
@property
def is_train_phase(self):
return self.phase in ["train"]
......@@ -653,10 +658,18 @@ class BasicTask(object):
step_run_state.run_step = 1
num_batch_examples = len(batch)
fetch_result = self.exe.run(
self.main_program_to_be_run,
feed=data_feeder.feed(batch),
fetch_list=self.fetch_list)
if self.return_numpy:
fetch_result = self.exe.run(
self.main_program_to_be_run,
feed=data_feeder.feed(batch),
fetch_list=self.fetch_list)
else:
fetch_result = self.exe.run(
self.main_program_to_be_run,
feed=data_feeder.feed(batch),
fetch_list=self.fetch_list,
return_numpy=False)
fetch_result = [np.array(x) for x in fetch_result]
for index, result in enumerate(fetch_result):
step_run_state.run_results[index] = result
......@@ -694,8 +707,17 @@ class BasicTask(object):
num_batch_examples = self.config.batch_size * self.device_count
step_run_state = RunState(len(self.fetch_list))
step_run_state.run_step = 1
fetch_result = self.exe.run(
self.main_program_to_be_run, fetch_list=self.fetch_list)
if self.return_numpy:
fetch_result = self.exe.run(
self.main_program_to_be_run,
fetch_list=self.fetch_list)
else:
fetch_result = self.exe.run(
self.main_program_to_be_run,
fetch_list=self.fetch_list,
return_numpy=False)
fetch_result = [np.array(x) for x in fetch_result]
for index, result in enumerate(fetch_result):
step_run_state.run_results[index] = result
......
......@@ -34,10 +34,13 @@ class SequenceLabelTask(BasicTask):
data_reader,
startup_program=None,
config=None,
metrics_choices="default"):
metrics_choices="default",
add_crf=False):
if metrics_choices == "default":
metrics_choices = ["f1", "precision", "recall"]
self.add_crf = add_crf
main_program = feature.block.program
super(SequenceLabelTask, self).__init__(
data_reader=data_reader,
......@@ -50,31 +53,56 @@ class SequenceLabelTask(BasicTask):
self.max_seq_len = max_seq_len
self.num_classes = num_classes
def _build_net(self):
self.logits = fluid.layers.fc(
input=self.feature,
size=self.num_classes,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name="cls_seq_label_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_seq_label_out_b",
initializer=fluid.initializer.Constant(0.)))
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
ret_infers = fluid.layers.assign(self.ret_infers)
@property
def return_numpy(self):
if self.add_crf:
return False
else:
return True
def _build_net(self):
self.seq_len = fluid.layers.data(
name="seq_len", shape=[1], dtype='int64')
seq_len = fluid.layers.assign(self.seq_len)
logits = self.logits
logits = fluid.layers.flatten(logits, axis=2)
logits = fluid.layers.softmax(logits)
self.num_labels = logits.shape[1]
return [logits]
if self.add_crf:
unpad_feature = fluid.layers.sequence_unpad(
self.feature, length=self.seq_len)
self.emission = fluid.layers.fc(
size=self.num_classes,
input=unpad_feature,
param_attr=fluid.ParamAttr(
initializer=fluid.initializer.Uniform(low=-0.1, high=0.1),
regularizer=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)))
size = self.emission.shape[1]
fluid.layers.create_parameter(
shape=[size + 2, size], dtype=self.emission.dtype, name='crfw')
self.ret_infers = fluid.layers.crf_decoding(
input=self.emission, param_attr=fluid.ParamAttr(name='crfw'))
ret_infers = fluid.layers.assign(self.ret_infers)
return [ret_infers]
else:
self.logits = fluid.layers.fc(
input=self.feature,
size=self.num_classes,
num_flatten_dims=2,
param_attr=fluid.ParamAttr(
name="cls_seq_label_out_w",
initializer=fluid.initializer.TruncatedNormal(scale=0.02)),
bias_attr=fluid.ParamAttr(
name="cls_seq_label_out_b",
initializer=fluid.initializer.Constant(0.)))
self.ret_infers = fluid.layers.reshape(
x=fluid.layers.argmax(self.logits, axis=2), shape=[-1, 1])
ret_infers = fluid.layers.assign(self.ret_infers)
logits = self.logits
logits = fluid.layers.flatten(logits, axis=2)
logits = fluid.layers.softmax(logits)
self.num_labels = logits.shape[1]
return [logits]
def _add_label(self):
label = fluid.layers.data(
......@@ -82,30 +110,60 @@ class SequenceLabelTask(BasicTask):
return [label]
def _add_loss(self):
labels = fluid.layers.flatten(self.labels[0], axis=2)
ce_loss = fluid.layers.cross_entropy(
input=self.outputs[0], label=labels)
loss = fluid.layers.mean(x=ce_loss)
if self.add_crf:
labels = fluid.layers.sequence_unpad(self.labels[0], self.seq_len)
crf_cost = fluid.layers.linear_chain_crf(
input=self.emission,
label=labels,
param_attr=fluid.ParamAttr(name='crfw'))
loss = fluid.layers.mean(x=crf_cost)
else:
labels = fluid.layers.flatten(self.labels[0], axis=2)
ce_loss = fluid.layers.cross_entropy(
input=self.outputs[0], label=labels)
loss = fluid.layers.mean(x=ce_loss)
return loss
def _add_metrics(self):
self.ret_labels = fluid.layers.reshape(x=self.labels[0], shape=[-1, 1])
return [self.ret_labels, self.ret_infers, self.seq_len]
if self.add_crf:
labels = fluid.layers.sequence_unpad(self.labels[0], self.seq_len)
(precision, recall, f1_score, num_infer_chunks, num_label_chunks,
num_correct_chunks) = fluid.layers.chunk_eval(
input=self.outputs[0],
label=labels,
chunk_scheme="IOB",
num_chunk_types=int(np.ceil((self.num_classes - 1) / 2.0)))
chunk_evaluator = fluid.metrics.ChunkEvaluator()
chunk_evaluator.reset()
return [precision, recall, f1_score]
else:
self.ret_labels = fluid.layers.reshape(
x=self.labels[0], shape=[-1, 1])
return [self.ret_labels, self.ret_infers, self.seq_len]
def _calculate_metrics(self, run_states):
total_infer = total_label = total_correct = loss_sum = 0
run_step = run_time_used = run_examples = 0
precision_sum = recall_sum = f1_score_sum = 0
for run_state in run_states:
loss_sum += np.mean(run_state.run_results[-1])
np_labels = run_state.run_results[0]
np_infers = run_state.run_results[1]
np_lens = run_state.run_results[2]
label_num, infer_num, correct_num = chunk_eval(
np_labels, np_infers, np_lens, self.num_labels,
self.device_count)
total_infer += infer_num
total_label += label_num
total_correct += correct_num
if self.add_crf:
precision_sum += np.mean(
run_state.run_results[0]) * run_state.run_examples
recall_sum += np.mean(
run_state.run_results[1]) * run_state.run_examples
f1_score_sum += np.mean(
run_state.run_results[2]) * run_state.run_examples
else:
np_labels = run_state.run_results[0]
np_infers = run_state.run_results[1]
np_lens = run_state.run_results[2]
label_num, infer_num, correct_num = chunk_eval(
np_labels, np_infers, np_lens, self.num_labels,
self.device_count)
total_infer += infer_num
total_label += label_num
total_correct += correct_num
run_examples += run_state.run_examples
run_step += run_state.run_step
......@@ -113,8 +171,13 @@ class SequenceLabelTask(BasicTask):
run_speed = run_step / run_time_used
avg_loss = loss_sum / run_examples
precision, recall, f1 = calculate_f1(total_label, total_infer,
total_correct)
if self.add_crf:
precision = precision_sum / run_examples
recall = recall_sum / run_examples
f1 = f1_score_sum / run_examples
else:
precision, recall, f1 = calculate_f1(total_label, total_infer,
total_correct)
# The first key will be used as main metrics to update the best model
scores = OrderedDict()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册