diff --git a/README_zh.md b/README_zh.md index 06da939b490ff265d9396a3acd764e066269629c..8978b8445df2334b51735ecfb0ef2fc92cdf52da 100644 --- a/README_zh.md +++ b/README_zh.md @@ -4,7 +4,7 @@ PaddlePALM (PArallel Learning from Multi-tasks) 是一个灵活,通用且易于使用的NLP大规模预训练和多任务学习框架。 PALM是一个旨在**快速开发高性能NLP模型**的上层框架。 -使用PaddlePALM,可以非常轻松灵活的探索具有多种任务辅助训练的“高鲁棒性”阅读理解模型,基于PALM训练的模型[D-Net](https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/Research/MRQA2019-D-NET)在[EMNLP2019国际阅读理解评测](mrqa .github.io)中夺得冠军。 +使用PaddlePALM,可以非常轻松灵活的探索具有多种任务辅助训练的“高鲁棒性”阅读理解模型,基于PALM训练的模型[D-Net](https://github.com/PaddlePaddle/models/tree/develop/PaddleNLP/Research/MRQA2019-D-NET)在[EMNLP2019国际阅读理解评测](https://mrqa.github.io/)中夺得冠军。

Sample @@ -196,10 +196,10 @@ Available pretrain items: 更多实现细节请见示例: -- [Sentiment Classification](https://github.com/PaddlePaddle/PALM/tree/master/examples/classification) -- [Quora Question Pairs matching](https://github.com/PaddlePaddle/PALM/tree/master/examples/matching) -- [Tagging](https://github.com/PaddlePaddle/PALM/tree/master/examples/tagging) -- [SQuAD machine Reading Comprehension](https://github.com/PaddlePaddle/PALM/tree/master/examples/mrc). +- [情感分析](https://github.com/PaddlePaddle/PALM/tree/master/examples/classification) +- [Quora问题相似度匹配](https://github.com/PaddlePaddle/PALM/tree/master/examples/matching) +- [命名实体识别](https://github.com/PaddlePaddle/PALM/tree/master/examples/tagging) +- [类SQuAD机器阅读理解](https://github.com/PaddlePaddle/PALM/tree/master/examples/mrc) #### 多任务学习 @@ -218,7 +218,7 @@ multi_head_trainer的保存/加载和预测操作与trainer相同。 更多实现`multi_head_trainer`的细节,请见 -- [ATIS: joint training of dialogue intent recognition and slot filling](https://github.com/PaddlePaddle/PALM/tree/master/examples/multi-task) +- [ATIS: 对话意图识别和插槽填充的联合训练](https://github.com/PaddlePaddle/PALM/tree/master/examples/multi-task) #### 设置saver diff --git a/examples/classification/run.py b/examples/classification/run.py index 35692e17aca780673decb8ad301793bdd691335e..cd6ad86a515aa5054d267a1ce4cc674ae1880359 100644 --- a/examples/classification/run.py +++ b/examples/classification/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': diff --git a/examples/matching/run.py b/examples/matching/run.py index ff551dfcb7ddf2821005e3daebdf51b8156e0553..cfb6994edb55ee298ed31de01d5876dd339a9a2a 100644 --- a/examples/matching/run.py +++ b/examples/matching/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': diff --git a/examples/mrc/run.py b/examples/mrc/run.py index fc3ee79ca1c16d2d574a49a5027eae624085a55b..4b57bb8e430fa22d91893f64c3fa15bc44d15560 100644 --- a/examples/mrc/run.py +++ b/examples/mrc/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': diff --git a/examples/multi-task/run.py b/examples/multi-task/run.py index 18760586f782f615f47707e3812acf939a6a848a..fb76f3de32fe7bd704c7a6f9fac71eb6c74697bf 100644 --- a/examples/multi-task/run.py +++ b/examples/multi-task/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': @@ -80,4 +79,4 @@ if __name__ == '__main__': # save_steps = 10 trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type) # step 8-3: start training - trainer.train(print_steps=print_steps) \ No newline at end of file + trainer.train(print_steps=print_steps) diff --git a/examples/predict/run.py b/examples/predict/run.py index 1b0bc84b0282ffb5ef5726756602b59131fa3875..dec974946727707a2d9fa192cbf88324da1eafce 100644 --- a/examples/predict/run.py +++ b/examples/predict/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': diff --git a/examples/tagging/run.py b/examples/tagging/run.py index 6228304d54f6bc8050c2422fd7d410e0d23bd27f..e4887b66ef5e5f53dcb2d290eb42953114605b2f 100644 --- a/examples/tagging/run.py +++ b/examples/tagging/run.py @@ -1,7 +1,6 @@ # coding=utf-8 import paddlepalm as palm import json -from paddlepalm.distribute import gpu_dev_count if __name__ == '__main__': @@ -64,9 +63,9 @@ if __name__ == '__main__': # step 7: fit prepared reader and data trainer.fit_reader(seq_label_reader) - # # step 8-1*: load pretrained parameters + # step 8-1*: load pretrained parameters trainer.load_pretrain(pre_params) - # # step 8-2*: set saver to save model + # step 8-2*: set saver to save model save_steps = 1951 # print('save_steps: {}'.format(save_steps)) trainer.set_saver(save_path=save_path, save_steps=save_steps, save_type=save_type) diff --git a/paddlepalm/head/cls.py b/paddlepalm/head/cls.py index 66117ac8810b9844f9ee2f73972b1090aa470122..4da3580418a0c424e0aa17f67463b555f9344ac2 100644 --- a/paddlepalm/head/cls.py +++ b/paddlepalm/head/cls.py @@ -98,7 +98,7 @@ class Classify(Head): raise ValueError('argument output_dir not found in config. Please add it into config dict/file.') with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: for i in range(len(self._preds)): - label = np.argmax(np.array(self._preds[i])) + label = int(np.argmax(np.array(self._preds[i]))) result = {'index': i, 'label': label, 'logits': self._preds[i], 'probs': self._probs[i]} result = json.dumps(result) writer.write(result+'\n') diff --git a/paddlepalm/head/match.py b/paddlepalm/head/match.py index 9df4a1a1d8c532db94eaf3484ac88581184e07b8..38cf1b2389ddb7426f338d86146c10bf9ffd2ce7 100644 --- a/paddlepalm/head/match.py +++ b/paddlepalm/head/match.py @@ -179,7 +179,7 @@ class Match(Head): with open(os.path.join(output_dir, 'predictions.json'), 'w') as writer: for i in range(len(self._preds)): if self._learning_strategy == 'pointwise': - label = np.argmax(np.array(self._preds[i])) + label = int(np.argmax(np.array(self._preds[i]))) result = {'index': i, 'label': label, 'logits': self._preds_logits[i], 'probs': self._preds[i]} elif self._learning_strategy == 'pairwise': result = {'index': i, 'probs': self._preds[i][0]} diff --git a/paddlepalm/reader/utils/reader4ernie.py b/paddlepalm/reader/utils/reader4ernie.py index d3f7eb29b269ee50b13184460de6c160c676933a..c2b087460fddc1a53247d322ab6c6ef679503dbf 100644 --- a/paddlepalm/reader/utils/reader4ernie.py +++ b/paddlepalm/reader/utils/reader4ernie.py @@ -37,7 +37,7 @@ from paddlepalm.reader.utils.mlm_batching import prepare_batch_data log = logging.getLogger(__name__) -if six.PY3: +if six.PY3 and hasattr(sys.stdout, 'buffer'): import io sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding='utf-8') sys.stderr = io.TextIOWrapper(sys.stderr.buffer, encoding='utf-8')