diff --git a/seq2seq/README.md b/examples/seq2seq/README.md similarity index 100% rename from seq2seq/README.md rename to examples/seq2seq/README.md diff --git a/seq2seq/args.py b/examples/seq2seq/args.py similarity index 100% rename from seq2seq/args.py rename to examples/seq2seq/args.py diff --git a/seq2seq/download.py b/examples/seq2seq/download.py similarity index 100% rename from seq2seq/download.py rename to examples/seq2seq/download.py diff --git a/seq2seq/predict.py b/examples/seq2seq/predict.py similarity index 92% rename from seq2seq/predict.py rename to examples/seq2seq/predict.py index c9120bff126cc505b3c0ee3274f65b67e8f78fe6..ae8d11be0de3b3697ec2c6f0a225c937d0cd7af9 100644 --- a/seq2seq/predict.py +++ b/examples/seq2seq/predict.py @@ -15,8 +15,6 @@ import logging import os import io -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random from functools import partial @@ -25,10 +23,10 @@ import paddle.fluid as fluid from paddle.fluid.layers.utils import flatten from paddle.fluid.io import DataLoader -from model import Input, set_device +from hapi.model import Input, set_device from args import parse_args from seq2seq_base import BaseInferModel -from seq2seq_attn import AttentionInferModel, AttentionGreedyInferModel +from seq2seq_attn import AttentionInferModel from reader import Seq2SeqDataset, Seq2SeqBatchSampler, SortType, prepare_infer_input @@ -87,8 +85,7 @@ def do_predict(args): num_workers=0, return_list=True) - # model_maker = AttentionInferModel if args.attention else BaseInferModel - model_maker = AttentionGreedyInferModel if args.attention else BaseInferModel + model_maker = AttentionInferModel if args.attention else BaseInferModel model = model_maker( args.src_vocab_size, args.tar_vocab_size, diff --git a/seq2seq/reader.py b/examples/seq2seq/reader.py similarity index 100% rename from seq2seq/reader.py rename to examples/seq2seq/reader.py diff --git a/seq2seq/run.sh b/examples/seq2seq/run.sh similarity index 100% rename from seq2seq/run.sh rename to examples/seq2seq/run.sh diff --git a/seq2seq/seq2seq_attn.py b/examples/seq2seq/seq2seq_attn.py similarity index 71% rename from seq2seq/seq2seq_attn.py rename to examples/seq2seq/seq2seq_attn.py index 136b4741d95af90c564e8bac7ce6723198533a28..ce9cc089ca2133549fbdd08ed600e69d4235e08c 100644 --- a/seq2seq/seq2seq_attn.py +++ b/examples/seq2seq/seq2seq_attn.py @@ -19,8 +19,9 @@ from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder -from text import DynamicDecode, RNN, BasicLSTMCell, RNNCell -from model import Model, Loss +from hapi.model import Model, Loss +from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell + from seq2seq_base import Encoder @@ -238,92 +239,3 @@ class AttentionInferModel(AttentionModel): encoder_output=encoder_output, encoder_padding_mask=encoder_padding_mask) return rs - - -class GreedyEmbeddingHelper(fluid.layers.GreedyEmbeddingHelper): - def __init__(self, embedding_fn, start_tokens, end_token): - if isinstance(start_tokens, int): - self.need_convert_start_tokens = True - self.start_token_value = start_tokens - super(GreedyEmbeddingHelper, self).__init__(embedding_fn, start_tokens, - end_token) - self.end_token = fluid.layers.create_global_var( - shape=[1], dtype="int64", value=end_token, persistable=True) - - def initialize(self, batch_ref=None): - if getattr(self, "need_convert_start_tokens", False): - assert batch_ref is not None, ( - "Need to give batch_ref to get batch size " - "to initialize the tensor for start tokens.") - self.start_tokens = fluid.layers.fill_constant_batch_size_like( - input=fluid.layers.utils.flatten(batch_ref)[0], - shape=[-1], - dtype="int64", - value=self.start_token_value, - input_dim_idx=0) - return super(GreedyEmbeddingHelper, self).initialize() - - -class BasicDecoder(fluid.layers.BasicDecoder): - def initialize(self, initial_cell_states): - (initial_inputs, - initial_finished) = self.helper.initialize(initial_cell_states) - return initial_inputs, initial_cell_states, initial_finished - - -class AttentionGreedyInferModel(AttentionModel): - def __init__(self, - src_vocab_size, - trg_vocab_size, - embed_dim, - hidden_size, - num_layers, - dropout_prob=0., - bos_id=0, - eos_id=1, - beam_size=1, - max_out_len=256): - args = dict(locals()) - args.pop("self") - args.pop("__class__", None) # py3 - args.pop("beam_size", None) - self.bos_id = args.pop("bos_id") - self.eos_id = args.pop("eos_id") - self.max_out_len = args.pop("max_out_len") - super(AttentionGreedyInferModel, self).__init__(**args) - # dynamic decoder for inference - decoder_helper = GreedyEmbeddingHelper( - start_tokens=bos_id, - end_token=eos_id, - embedding_fn=self.decoder.embedder) - decoder = BasicDecoder( - cell=self.decoder.lstm_attention.cell, - helper=decoder_helper, - output_fn=self.decoder.output_layer) - self.greedy_search_decoder = DynamicDecode( - decoder, max_step_num=max_out_len, is_test=True) - - def forward(self, src, src_length): - # encoding - encoder_output, encoder_final_state = self.encoder(src, src_length) - - # decoder initial states - decoder_initial_states = [ - encoder_final_state, - self.decoder.lstm_attention.cell.get_initial_states( - batch_ref=encoder_output, shape=[self.hidden_size]) - ] - # attention mask to avoid paying attention on padddings - src_mask = layers.sequence_mask( - src_length, - maxlen=layers.shape(src)[1], - dtype=encoder_output.dtype) - encoder_padding_mask = (src_mask - 1.0) * 1e9 - encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1]) - - # dynamic decoding with greedy search - rs, _ = self.greedy_search_decoder( - inits=decoder_initial_states, - encoder_output=encoder_output, - encoder_padding_mask=encoder_padding_mask) - return rs.sample_ids diff --git a/seq2seq/seq2seq_base.py b/examples/seq2seq/seq2seq_base.py similarity index 82% rename from seq2seq/seq2seq_base.py rename to examples/seq2seq/seq2seq_base.py index 83fd187d75ce1e100a66729eef1112ca7724e8c7..c28e2dc52935526d69d78ae73bfd48c92528b93c 100644 --- a/seq2seq/seq2seq_base.py +++ b/examples/seq2seq/seq2seq_base.py @@ -18,8 +18,9 @@ from paddle.fluid import ParamAttr from paddle.fluid.initializer import UniformInitializer from paddle.fluid.dygraph import Embedding, Linear, Layer from paddle.fluid.layers import BeamSearchDecoder -from text import DynamicDecode, RNN, BasicLSTMCell, RNNCell -from model import Model, Loss + +from hapi.model import Model, Loss +from hapi.text import DynamicDecode, RNN, BasicLSTMCell, RNNCell class CrossEntropyCriterion(Loss): @@ -200,44 +201,3 @@ class BaseInferModel(BaseModel): # dynamic decoding with beam search rs, _ = self.beam_search_decoder(inits=encoder_final_states) return rs - - -class BaseGreedyInferModel(BaseModel): - def __init__(self, - src_vocab_size, - trg_vocab_size, - embed_dim, - hidden_size, - num_layers, - dropout_prob=0., - bos_id=0, - eos_id=1, - beam_size=1, - max_out_len=256): - args = dict(locals()) - args.pop("self") - args.pop("__class__", None) # py3 - args.pop("beam_size", None) - self.bos_id = args.pop("bos_id") - self.eos_id = args.pop("eos_id") - self.max_out_len = args.pop("max_out_len") - super(BaseGreedyInferModel, self).__init__(**args) - # dynamic decoder for inference - decoder_helper = GreedyEmbeddingHelper( - start_tokens=bos_id, - end_token=eos_id, - embedding_fn=self.decoder.embedder) - decoder = BasicDecoder( - cell=self.decoder.stack_lstm.cell, - helper=decoder_helper, - output_fn=self.decoder.output_layer) - self.greedy_search_decoder = DynamicDecode( - decoder, max_step_num=max_out_len, is_test=True) - - def forward(self, src, src_length): - # encoding - encoder_output, encoder_final_states = self.encoder(src, src_length) - - # dynamic decoding with greedy search - rs, _ = self.greedy_search_decoder(inits=encoder_final_states) - return rs.sample_ids diff --git a/seq2seq/train.py b/examples/seq2seq/train.py similarity index 93% rename from seq2seq/train.py rename to examples/seq2seq/train.py index 4502628737b1af357bbad476856c680c4c75ee9d..8a0190cee1865e1f16b3d4edd6cd2fd5e98906d4 100644 --- a/seq2seq/train.py +++ b/examples/seq2seq/train.py @@ -14,8 +14,6 @@ import logging import os -import sys -sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) import random from functools import partial @@ -23,9 +21,7 @@ import numpy as np import paddle.fluid as fluid from paddle.fluid.io import DataLoader -from model import Input, set_device -from metrics import Metric -from callbacks import ProgBarLogger +from hapi.model import Input, set_device from args import parse_args from seq2seq_base import BaseModel, CrossEntropyCriterion from seq2seq_attn import AttentionModel diff --git a/seq2seq/utility.py b/examples/seq2seq/utility.py similarity index 96% rename from seq2seq/utility.py rename to examples/seq2seq/utility.py index eb54e8c70e5a17adf2f4d5e9e6f53829297e3073..a1c1264eb195dfe92380bdea8beaaf2012fea3f5 100644 --- a/seq2seq/utility.py +++ b/examples/seq2seq/utility.py @@ -15,8 +15,8 @@ import numpy as np import paddle.fluid as fluid -from metrics import Metric -from callbacks import ProgBarLogger +from hapi.metrics import Metric +from hapi.callbacks import ProgBarLogger class TrainCallback(ProgBarLogger): diff --git a/hapi/text/text.py b/hapi/text/text.py index e5be32bcb531b938c3cc8c21ec7caf2a4f40ee6e..319800d46597f1ad7cf6806843184534e7626807 100644 --- a/hapi/text/text.py +++ b/hapi/text/text.py @@ -238,8 +238,9 @@ class BasicLSTMCell(RNNCell): self._bias_attr = bias_attr self._gate_activation = gate_activation or layers.sigmoid self._activation = activation or layers.tanh - self._forget_bias = layers.fill_constant( - [1], dtype=dtype, value=forget_bias) + # TODO(guosheng): find better way to resolve constants in __init__ + self._forget_bias = layers.create_global_var( + shape=[1], dtype=dtype, value=forget_bias, persistable=True) self._forget_bias.stop_gradient = False self._dtype = dtype self._input_size = input_size