diff --git a/README.md b/README.md index 4f59e69cd7033f6788397b1a6c504dc64833353d..82158520e4cc191e0ae26d20b8cb41bd15079ebc 100644 --- a/README.md +++ b/README.md @@ -99,8 +99,15 @@ $ wget https://paddlehub.bj.bcebos.com/resources/test_image.jpg $ hub run ace2p --input_path test_image.jpg $ hub run deeplabv3p_xception65_humanseg --input_path test_image.jpg ``` -

- + + +

+ +

+          ace2p分割结果展示                 + humanseg分割结果展示   

PaddleHub还提供图像分类、语义模型、视频分类、图像生成、图像分割、文本审核、关键点检测等主流模型,更多模型介绍,请前往 [https://www.paddlepaddle.org.cn/hub](https://www.paddlepaddle.org.cn/hub) 查看 diff --git a/RELEASE.md b/RELEASE.md index 35f1710b127ea1013f04df72e85c10aad4c205ac..b2e177dfa2de3d09d5e85be59dcd2d1914c8368c 100644 --- a/RELEASE.md +++ b/RELEASE.md @@ -1,3 +1,18 @@ +# `v1.6.0` + +* NLP Module全面升级,提升应用性和灵活性 + * lac、senta系列(bow、cnn、bilstm、gru、lstm)、simnet_bow、porn_detection系列(cnn、gru、lstm)升级高性能预测,性能提升高达50% + * ERNIE、BERT、RoBERTa等Transformer类语义模型新增获取预训练embedding接口get_embedding,方便接入下游任务,提升应用性 + * 新增RoBERTa通过模型结构压缩得到的3层Transformer模型[rbt3](https://www.paddlepaddle.org.cn/hubdetail?name=rbt3&en_category=SemanticModel)、[rbtl3](https://www.paddlepaddle.org.cn/hubdetail?name=rbtl3&en_category=SemanticModel) + +* Task predict接口增加高性能预测模式accelerate_mode,性能提升高达90% + +* PaddleHub Module创建流程开放,支持Fine-tune模型转化,全面提升应用性和灵活性 + * [预训练模型转化为PaddleHub Module教程](./docs/contribution/contri_pretrained_model.md) + * [Fine-tune模型转化为PaddleHub Module教程](./docs/tutorial/finetuned_model_to_module.md) + +* [PaddleHub Serving](/docs/tutorial/serving.md)优化启动方式,支持更加灵活的参数配置 + # `v1.5.4` * 修复Fine-tune中断,checkpoint文件恢复训练失败的问题 diff --git a/demo/text_classification/README.md b/demo/text_classification/README.md index 5da82c53dcf67557be00aa36cff80d3bae917eb2..560feacdaa3ee7c60e9ab3da43232baa6a75fe80 100644 --- a/demo/text_classification/README.md +++ b/demo/text_classification/README.md @@ -218,3 +218,9 @@ python predict.py --checkpoint_dir $CKPT_DIR --max_seq_len 128 ## 超参优化AutoDL Finetuner PaddleHub还提供了超参优化(Hyperparameter Tuning)功能, 自动搜索最优模型超参得到更好的模型效果。详细信息参见[AutoDL Finetuner超参优化功能教程](../../docs/tutorial/autofinetune.md)。 + + +## Fine-tune之后保存的模型转化为PaddleHub Module + +代码详见[finetuned_model_to_module](./finetuned_model_to_module)文件夹下 +Fine-tune之后保存的模型转化为PaddleHub Module[教程](../../docs/tutorial/finetuned_model_to_module.md) diff --git a/demo/text_classification/finetuned_model_to_module/__init__.py b/demo/text_classification/finetuned_model_to_module/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/demo/text_classification/finetuned_model_to_module/module.py b/demo/text_classification/finetuned_model_to_module/module.py new file mode 100644 index 0000000000000000000000000000000000000000..f79be3a2c97e93387c1dc4bba46aaa2d95bfef5a --- /dev/null +++ b/demo/text_classification/finetuned_model_to_module/module.py @@ -0,0 +1,124 @@ +# -*- coding:utf-8 -*- +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Finetuning on classification task """ +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import numpy as np +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +import paddlehub as hub + + +@moduleinfo( + name="ernie_tiny_finetuned", + version="1.0.0", + summary="ERNIE tiny which was fine-tuned on the chnsenticorp dataset.", + author="anonymous", + author_email="", + type="nlp/semantic_model") +class ERNIETinyFinetuned(hub.Module): + def _initialize(self, + ckpt_dir="ckpt_chnsenticorp", + num_class=2, + max_seq_len=128, + use_gpu=False, + batch_size=1): + self.ckpt_dir = os.path.join(self.directory, ckpt_dir) + self.num_class = num_class + self.MAX_SEQ_LEN = max_seq_len + + # Load Paddlehub ERNIE Tiny pretrained model + self.module = hub.Module(name="ernie_tiny") + inputs, outputs, program = self.module.context( + trainable=True, max_seq_len=max_seq_len) + + self.vocab_path = self.module.get_vocab_path() + + # Download dataset and use accuracy as metrics + # Choose dataset: GLUE/XNLI/ChinesesGLUE/NLPCC-DBQA/LCQMC + # metric should be acc, f1 or matthews + metrics_choices = ["acc"] + + # For ernie_tiny, it use sub-word to tokenize chinese sentence + # If not ernie tiny, sp_model_path and word_dict_path should be set None + reader = hub.reader.ClassifyReader( + vocab_path=self.module.get_vocab_path(), + max_seq_len=max_seq_len, + sp_model_path=self.module.get_spm_path(), + word_dict_path=self.module.get_word_dict_path()) + + # Construct transfer learning network + # Use "pooled_output" for classification tasks on an entire sentence. + # Use "sequence_output" for token-level output. + pooled_output = outputs["pooled_output"] + + # Setup feed list for data feeder + # Must feed all the tensor of module need + feed_list = [ + inputs["input_ids"].name, + inputs["position_ids"].name, + inputs["segment_ids"].name, + inputs["input_mask"].name, + ] + + # Setup runing config for PaddleHub Finetune API + config = hub.RunConfig( + use_data_parallel=False, + use_cuda=use_gpu, + batch_size=batch_size, + checkpoint_dir=self.ckpt_dir, + strategy=hub.AdamWeightDecayStrategy()) + + # Define a classfication finetune task by PaddleHub's API + self.cls_task = hub.TextClassifierTask( + data_reader=reader, + feature=pooled_output, + feed_list=feed_list, + num_classes=self.num_class, + config=config, + metrics_choices=metrics_choices) + + def predict(self, data, return_result=False, accelerate_mode=True): + """ + Get prediction results + """ + run_states = self.cls_task.predict( + data=data, + return_result=return_result, + accelerate_mode=accelerate_mode) + return run_states + + +if __name__ == "__main__": + ernie_tiny = ERNIETinyFinetuned( + ckpt_dir="../ckpt_chnsenticorp", num_class=2) + + # Data to be prdicted + data = [["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]] + + index = 0 + run_states = ernie_tiny.predict(data=data) + results = [run_state.run_results for run_state in run_states] + for batch_result in results: + # get predict index + batch_result = np.argmax(batch_result, axis=2)[0] + for result in batch_result: + print("%s\tpredict=%s" % (data[index][0], result)) + index += 1 diff --git a/docs/contribution/contri_pretrained_model.md b/docs/contribution/contri_pretrained_model.md index 8c9c21d8503b249a781e4cf75c5b883c780961d4..93c788808510dbd832076358d2192dc4105858b0 100644 --- a/docs/contribution/contri_pretrained_model.md +++ b/docs/contribution/contri_pretrained_model.md @@ -1,34 +1,213 @@ -# 贡献预训练模型 +# 如何编写一个PaddleHub Module -我们非常欢迎开发者贡献预训练模型到PaddleHub中,如果你想要贡献预训练模型,请提供以下资源: +## 模型基本信息 -## 模型 +我们准备编写一个PaddleHub Module,Module的基本信息如下: +```yaml +name: senta_test +version: 1.0.0 +summary: This is a PaddleHub Module. Just for test. +author: anonymous +author_email: +type: nlp/sentiment_analysis +``` -请提供相应的网络结构和参数文件,除了PaddlePaddle的模型外,我们也支持将其他主流框架的模型转换到PaddleHub中,包括: -* tensorflow -* pytorch -* mxnet -* caffe -* onnx +**本示例代码可以参考[senta_module_sample](../../demo/senta_module_sample/)** -您可以直接使用 [**x2paddle**](https://github.com/PaddlePaddle/X2Paddle) 进行转换,也可以将相应模型提供给我们,由我们进行转换 +Module存在一个接口sentiment_classify,用于接收传入文本,并给出文本的情感倾向(正面/负面),支持python接口调用和命令行调用。 +```python +import paddlehub as hub -## 相关代码 +senta_test = hub.Module(name="senta_test") +senta_test.sentiment_classify(texts=["这部电影太差劲了"]) +``` +```cmd +hub run senta_test --input_text 这部电影太差劲了 +``` -* 支持预测的模型,请提供相应的预测脚本以及测试样例 -* 支持finetune的模型,请提供相应的finetune demo +
-## 相应的介绍资料 +## 策略 -|资料|是否必选| +为了示例代码简单起见,我们使用一个非常简单的情感判断策略,当输入文本中带有词表中指定单词时,则判断文本倾向为负向,否则为正向 + +
+ +## Module创建 + +### step 1. 创建必要的目录与文件 + +创建一个senta_test的目录,并在senta_test目录下分别创建__init__.py、module.py、processor.py、vocab.list,其中 + +|文件名|用途| |-|-| -|模型结构|√| -|预训练的数据集|√| -|模型介绍文案|√| -|源代码链接|| -|模型结构图|| -|第三方库依赖|| +|\_\_init\_\_.py|空文件| +|module.py|主模块,提供Module的实现代码| +|processor.py|辅助模块,提供词表加载的方法| +|vocab.list|存放词表| + +```cmd +➜ tree senta_test +senta_test/ +├── vocab.list +├── __init__.py +├── module.py +└── processor.py +``` +### step 2. 实现辅助模块processor + +在processor.py中实现一个load_vocab接口用于读取词表 +```python +def load_vocab(vocab_path): + with open(vocab_path) as file: + return file.read().split() +``` + +### step 3. 编写Module处理代码 + +module.py文件为Module的入口代码所在,我们需要在其中实现预测逻辑。 + +#### step 3_1. 引入必要的头文件 +```python +import argparse +import os + +import paddlehub as hub +from paddlehub.module.module import runnable, moduleinfo + +from senta_test.processor import load_vocab +``` +**NOTE:** 当引用Module中模块时,需要输入全路径,如senta_test.processor + +#### step 3_2. 定义SentaTest类 +module.py中需要有一个继承了hub.Module的类存在,该类负责实现预测逻辑,并使用moduleinfo填写基本信息。当使用hub.Module(name="senta_test")加载Module时,PaddleHub会自动创建SentaTest的对象并返回。 +```python +@moduleinfo( + name="senta_test", + version="1.0.0", + summary="This is a PaddleHub Module. Just for test.", + author="anonymous", + author_email="", + type="nlp/sentiment_analysis", +) +class SentaTest(hub.Module): + ... +``` +#### step 3_3. 执行必要的初始化 +```python +def _initialize(self): + # add arg parser + self.parser = argparse.ArgumentParser( + description="Run the senta_test module.", + prog='hub run senta_test', + usage='%(prog)s', + add_help=True) + self.parser.add_argument( + '--input_text', type=str, default=None, help="text to predict") + + # load word dict + vocab_path = os.path.join(self.directory, "vocab.list") + self.vocab = load_vocab(vocab_path) +``` +`注意`:执行类的初始化不能使用默认的__init__接口,而是应该重载实现_initialize接口。对象默认内置了directory属性,可以直接获取到Module所在路径 +#### step 3_4. 完善预测逻辑 +```python +def sentiment_classify(self, texts): + results = [] + for text in texts: + sentiment = "positive" + for word in self.vocab: + if word in text: + sentiment = "negative" + break + results.append({"text":text, "sentiment":sentiment}) + + return results +``` +#### step 3_5. 支持命令行调用 +如果希望Module可以支持命令行调用,则需要提供一个经过runnable修饰的接口,接口负责解析传入数据并进行预测,将结果返回。 + +如果不需要提供命令行预测功能,则可以不实现该接口,PaddleHub在用命令行执行时,会自动发现该Module不支持命令行方式,并给出提示。 +```python +@runnable +def run_cmd(self, argvs): + args = self.parser.parse_args(argvs) + texts = [args.input_text] + return self.sentiment_classify(texts) +``` +#### step 3_6. 支持serving调用 + +如果希望Module可以支持PaddleHub Serving部署预测服务,则需要提供一个经过serving修饰的接口,接口负责解析传入数据并进行预测,将结果返回。 + +如果不需要提供PaddleHub Serving部署预测服务,则可以不需要加上serving修饰。 + +```python +@serving +def sentiment_classify(self, texts): + results = [] + for text in texts: + sentiment = "positive" + for word in self.vocab: + if word in text: + sentiment = "negative" + break + results.append({"text":text, "sentiment":sentiment}) + + return results +``` + +### 完整代码 + +* [module.py](./senta_test/module.py) + +* [processor.py](./senta_test/module.py) + +
+ +## 测试步骤 + +完成Module编写后,我们可以通过以下方式测试该Module + +### 调用方法1 + +将Module安装到本机中,再通过Hub.Module(name=...)加载 +```shell +hub install senta_test +``` + +```python +import paddlehub as hub + +senta_test = hub.Module(name="senta_test") +senta_test.sentiment_classify(texts=["这部电影太差劲了"]) +``` + +### 调用方法2 + +直接通过Hub.Module(directory=...)加载 +```python +import paddlehub as hub + +senta_test = hub.Module(directory="senta_test/") +senta_test.sentiment_classify(texts=["这部电影太差劲了"]) +``` + +### 调用方法3 +将senta_test作为路径加到环境变量中,直接加载SentaTest对象 +```shell +export PYTHONPATH=senta_test:$PYTHONPATH +``` + +```python +from senta_test.module import SentaTest + +SentaTest.sentiment_classify(texts=["这部电影太差劲了"]) +``` -**NOTE:** +### 调用方法4 +将Module安装到本机中,再通过hub run运行 -* 为了保证使用体验,请确保模型在python 2.7/3.x下均可正常运行 +```shell +hub install senta_test +hub run senta_test --input_text "这部电影太差劲了" +``` diff --git a/docs/imgs/humanseg_test_res.png b/docs/imgs/humanseg_test_res.png new file mode 100644 index 0000000000000000000000000000000000000000..d6f689f7934ac3f702f18bb4f7fcf99c21325de9 Binary files /dev/null and b/docs/imgs/humanseg_test_res.png differ diff --git a/docs/tutorial/finetuned_model_to_module.md b/docs/tutorial/finetuned_model_to_module.md new file mode 100644 index 0000000000000000000000000000000000000000..ddc9d8d85f92e9267b2b214b757831965cbdd244 --- /dev/null +++ b/docs/tutorial/finetuned_model_to_module.md @@ -0,0 +1,275 @@ +# Fine-tune保存的模型如何转化为一个PaddleHub Module + +## 模型基本信息 + +本示例以模型ERNIE Tiny在数据集ChnSentiCorp上完成情感分类Fine-tune任务后保存的模型转化为一个PaddleHub Module,Module的基本信息如下: +```yaml +name: ernie_tiny_finetuned +version: 1.0.0 +summary: ERNIE tiny which was fine-tuned on the chnsenticorp dataset. +author: anonymous +author_email: +type: nlp/semantic_model +``` + +**本示例代码可以参考[finetuned_model_to_module](../../demo/text_classification/finetuned_model_to_module/)** + +Module存在一个接口predict,用于接收带预测,并给出文本的情感倾向(正面/负面),支持python接口调用和命令行调用。 +```python +import paddlehub as hub + +ernie_tiny_finetuned = hub.Module(name="ernie_tiny_finetuned") +ernie_tiny_finetuned.predcit(data=[["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]]) +``` + + +## Module创建 + +### step 1. 创建必要的目录与文件 + +创建一个finetuned_model_to_module的目录,并在finetuned_model_to_module目录下分别创建__init__.py、module.py,其中 + +|文件名|用途| +|-|-| +|\_\_init\_\_.py|空文件| +|module.py|主模块,提供Module的实现代码| +|ckpt文件|利用PaddleHub Fine-tune得到的ckpt文件夹,其中必须包含best_model文件| + + +```cmd +➜ tree finetuned_model_to_module +finetuned_model_to_module/ +├── __init__.py +├── ckpt_chnsenticorp +│   ├── *** +│   ├── best_model +│   │   ├── *** +└── module.py +``` + +### step 2. 编写Module处理代码 + +module.py文件为Module的入口代码所在,我们需要在其中实现预测逻辑。 + +#### step 2_1. 引入必要的头文件 +```python +import os + +import numpy as np +from paddlehub.common.logger import logger +from paddlehub.module.module import moduleinfo, serving +import paddlehub as hub +``` + +#### step 2_2. 定义ERNIE_Tiny_Finetuned类 +module.py中需要有一个继承了hub.Module的类存在,该类负责实现预测逻辑,并使用moduleinfo填写基本信息。当使用hub.Module(name="ernie_tiny_finetuned")加载Module时,PaddleHub会自动创建ERNIE_Tiny_Finetuned的对象并返回。 +```python +@moduleinfo( + name="ernie_tiny_finetuned", + version="1.0.0", + summary="ERNIE tiny which was fine-tuned on the chnsenticorp dataset.", + author="anonymous", + author_email="", + type="nlp/semantic_model") +class ERNIETinyFinetuned(hub.Module): + ... +``` +#### step 2_3. 执行必要的初始化 +```python +def _initialize(self, + ckpt_dir="ckpt_chnsenticorp", + num_class=2, + max_seq_len=128, + use_gpu=False, + batch_size=1): + self.ckpt_dir = os.path.join(self.directory, ckpt_dir) + self.num_class = num_class + self.MAX_SEQ_LEN = max_seq_len + + self.params_path = os.path.join(self.ckpt_dir, 'best_model') + if not os.path.exists(self.params_path): + logger.error( + "%s doesn't contain the best_model file which saves the best parameters as fietuning." + ) + exit() + + # Load Paddlehub ERNIE Tiny pretrained model + self.module = hub.Module(name="ernie_tiny") + inputs, outputs, program = self.module.context( + trainable=True, max_seq_len=max_seq_len) + + self.vocab_path = self.module.get_vocab_path() + + # Download dataset and use accuracy as metrics + # Choose dataset: GLUE/XNLI/ChinesesGLUE/NLPCC-DBQA/LCQMC + # metric should be acc, f1 or matthews + metrics_choices = ["acc"] + + # For ernie_tiny, it use sub-word to tokenize chinese sentence + # If not ernie tiny, sp_model_path and word_dict_path should be set None + reader = hub.reader.ClassifyReader( + vocab_path=self.module.get_vocab_path(), + max_seq_len=max_seq_len, + sp_model_path=self.module.get_spm_path(), + word_dict_path=self.module.get_word_dict_path()) + + # Construct transfer learning network + # Use "pooled_output" for classification tasks on an entire sentence. + # Use "sequence_output" for token-level output. + pooled_output = outputs["pooled_output"] + + # Setup feed list for data feeder + # Must feed all the tensor of module need + feed_list = [ + inputs["input_ids"].name, + inputs["position_ids"].name, + inputs["segment_ids"].name, + inputs["input_mask"].name, + ] + + # Setup runing config for PaddleHub Finetune API + config = hub.RunConfig( + use_data_parallel=False, + use_cuda=use_gpu, + batch_size=batch_size, + checkpoint_dir=self.ckpt_dir, + strategy=hub.AdamWeightDecayStrategy()) + + # Define a classfication finetune task by PaddleHub's API + self.cls_task = hub.TextClassifierTask( + data_reader=reader, + feature=pooled_output, + feed_list=feed_list, + num_classes=self.num_class, + config=config, + metrics_choices=metrics_choices) +``` + +初始化过程即为Fine-tune时创建Task的过程。 + +**NOTE:** 执行类的初始化不能使用默认的__init__接口,而是应该重载实现_initialize接口。对象默认内置了directory属性,可以直接获取到Module所在路径 + +#### step 3_4. 完善预测逻辑 +```python +def predict(self, data, return_result=False, accelerate_mode=True): + """ + Get prediction results + """ + run_states = self.cls_task.predict( + data=data, + return_result=return_result, + accelerate_mode=accelerate_mode) + return run_states +``` + +#### step 3_5. 支持serving调用 + +如果希望Module可以支持PaddleHub Serving部署预测服务,则需要将预测接口predcit加上serving修饰(`@serving`),接口负责解析传入数据并进行预测,将结果返回。 + +如果不需要提供PaddleHub Serving部署预测服务,则可以不需要加上serving修饰。 + +```python +@serving +def predict(self, data, return_result=False, accelerate_mode=True): + """ + Get prediction results + """ + run_states = self.cls_task.predict( + data=data, + return_result=return_result, + accelerate_mode=accelerate_mode) + return run_states +``` + +### 完整代码 + +* [module.py](../../demo/text_classification/finetuned_model_to_module/module.py) + +* [__init__.py](../../demo/text_classification/finetuned_model_to_module/__init__.py) + +**NOTE:** `__init__.py`是空文件 + +## 测试步骤 + +完成Module编写后,我们可以通过以下方式测试该Module + +### 调用方法1 + +将Module安装到本机中,再通过Hub.Module(name=...)加载 +```shell +hub install finetuned_model_to_module +``` + +安装成功会显示**Successfully installed ernie_tiny_finetuned** + +```python +import paddlehub as hub +import numpy as np + + +ernie_tiny = hub.Module(name="ernie_tiny_finetuned") + +# Data to be prdicted +data = [["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]] + +index = 0 +run_states = ernie_tiny.predict(data=data) +results = [run_state.run_results for run_state in run_states] +for batch_result in results: + # get predict index + batch_result = np.argmax(batch_result, axis=2)[0] + for result in batch_result: + print("%s\tpredict=%s" % (data[index][0], result)) + index += 1 +``` + +### 调用方法2 + +直接通过Hub.Module(directory=...)加载 +```python +import paddlehub as hub +import numpy as np + +ernie_tiny_finetuned = hub.Module(directory="finetuned_model_to_module/") + +# Data to be prdicted +data = [["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]] + +index = 0 +run_states = ernie_tiny.predict(data=data) +results = [run_state.run_results for run_state in run_states] +for batch_result in results: + # get predict index + batch_result = np.argmax(batch_result, axis=2)[0] + for result in batch_result: + print("%s\tpredict=%s" % (data[index][0], result)) + index += 1 +``` + +### 调用方法3 +将finetuned_model_to_module作为路径加到环境变量中,直接加载ERNIETinyFinetuned对象 +```shell +export PYTHONPATH=finetuned_model_to_module:$PYTHONPATH +``` + +```python +from finetuned_model_to_module.module import ERNIETinyFinetuned +import numpy as np + +# Data to be prdicted +data = [["这个宾馆比较陈旧了,特价的房间也很一般。总体来说一般"], ["交通方便;环境很好;服务态度很好 房间较小"], + ["19天硬盘就罢工了~~~算上运来的一周都没用上15天~~~可就是不能换了~~~唉~~~~你说这算什么事呀~~~"]] + +run_states = ERNIETinyFinetuned.predict(data=data) +index = 0 +results = [run_state.run_results for run_state in run_states] +for batch_result in results: + # get predict index + batch_result = np.argmax(batch_result, axis=2)[0] + for result in batch_result: + print("%s\tpredict=%s" % (data[index][0], result)) + index += 1 +``` diff --git a/docs/tutorial/tutorial_index.rst b/docs/tutorial/tutorial_index.rst index 9ebcbed0056dc424478cd326548d59130740de8e..44295eccbad69c55772f6606062bf6ca64f39f45 100644 --- a/docs/tutorial/tutorial_index.rst +++ b/docs/tutorial/tutorial_index.rst @@ -11,10 +11,11 @@ 命令行工具 自定义数据 + Fine-tune模型转化为PaddleHub Module 自定义任务 服务化部署 文本Embedding服务 语义相似度计算 ULMFit优化策略 超参优化 - Hook机制 \ No newline at end of file + Hook机制