Add CNN related apis in text.py

b3a1ddf8 · guosheng · 48d8a390 · b3a1ddf8 · b3a1ddf8 · b3a1ddf8
隐藏空白更改
内联并排

Showing with 325 addition and 3 deletion

hapi/tests/test_text.py hapi/tests/test_text.py +34 -0

hapi/text/__init__.py hapi/text/__init__.py +13 -1

hapi/text/text.py hapi/text/text.py +278 -2

未找到文件。
--- a/hapi/tests/test_text.py
+++ b/hapi/tests/test_text.py
@@ -711,5 +711,39 @@ class TestBiGRU(ModuleApiTest):
        self.check_output()


+class TestCNNEncoder(ModuleApiTest):
+    def setUp(self):
+        shape = (2, 32, 8)  # [N, C, H]
+        self.inputs = [np.random.random(shape).astype("float32")]
+        self.outputs = None
+        self.attrs = {"num_channels": 32, "num_filters": 64, "num_layers": 2}
+        self.param_states = {}
+
+    @staticmethod
+    def model_init(self, num_channels, num_filters, num_layers):
+        self.cnn_encoder = CNNEncoder(
+            num_layers=2,
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=[2, 3],
+            pool_size=[7, 6])
+
+    @staticmethod
+    def model_forward(self, inputs):
+        return self.cnn_encoder(inputs)
+
+    def make_inputs(self):
+        inputs = [
+            Input(
+                [None, self.inputs[-1].shape[1], None],
+                "float32",
+                name="input"),
+        ]
+        return inputs
+
+    def test_check_output_merge0(self):
+        self.check_output()
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/hapi/text/__init__.py
+++ b/hapi/text/__init__.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. 
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -16,15 +16,27 @@ from hapi.text.text import RNNCell as RNNCell
 from hapi.text.text import BasicLSTMCell as BasicLSTMCell
 from hapi.text.text import BasicGRUCell as BasicGRUCell
 from hapi.text.text import RNN as RNN
+from hapi.text.text import StackedLSTMCell as StackedLSTMCell
+from hapi.text.text import LSTM as LSTM
+from hapi.text.text import BidirectionalLSTM as BidirectionalLSTM
+from hapi.text.text import StackedGRUCell as StackedGRUCell
+from hapi.text.text import GRU as GRU
+from hapi.text.text import BidirectionalGRU as BidirectionalGRU
 from hapi.text.text import DynamicDecode as DynamicDecode
 from hapi.text.text import BeamSearchDecoder as BeamSearchDecoder
+
+from hapi.text.text import Conv1dPoolLayer as Conv1dPoolLayer
+from hapi.text.text import CNNEncoder as CNNEncoder
+
 from hapi.text.text import MultiHeadAttention as MultiHeadAttention
 from hapi.text.text import FFN as FFN
 from hapi.text.text import TransformerEncoderLayer as TransformerEncoderLayer
 from hapi.text.text import TransformerDecoderLayer as TransformerDecoderLayer
 from hapi.text.text import TransformerEncoder as TransformerEncoder
 from hapi.text.text import TransformerDecoder as TransformerDecoder
+from hapi.text.text import TransformerCell as TransformerCell
 from hapi.text.text import TransformerBeamSearchDecoder as TransformerBeamSearchDecoder
+
 from hapi.text.text import GRUCell as GRUCell
 from hapi.text.text import GRUEncoderCell as GRUEncoderCell
 from hapi.text.text import BiGRU as BiGRU

--- a/hapi/text/text.py
+++ b/hapi/text/text.py
@@ -37,7 +37,7 @@ import paddle
 import paddle.fluid as fluid
 import paddle.fluid.layers.utils as utils
 from paddle.fluid.layers.utils import map_structure, flatten, pack_sequence_as
-from paddle.fluid.dygraph import to_variable, Embedding, Linear, LayerNorm, GRUUnit
+from paddle.fluid.dygraph import Embedding, Linear, LayerNorm, GRUUnit, Conv2D, Pool2D
 from paddle.fluid.data_feeder import convert_dtype

 from paddle.fluid import layers
@@ -57,6 +57,8 @@ __all__ = [
    'BidirectionalGRU',
    'DynamicDecode',
    'BeamSearchDecoder',
+    'Conv1dPoolLayer',
+    'CNNEncoder',
    'MultiHeadAttention',
    'FFN',
    'TransformerEncoderLayer',
@@ -2171,7 +2173,7 @@ class DynamicDecode(Layer):

            import paddle
            import paddle.fluid as fluid
-            from paddle.incubate.hapi.text import StackedLSTMCell, RNN
+            from paddle.incubate.hapi.text import StackedLSTMCell, DynamicDecode

            vocab_size, d_model, = 100, 32
            encoder_output = paddle.rand((2, 4, d_model))
@@ -2344,6 +2346,280 @@ class DynamicDecode(Layer):
                **kwargs)


+class Conv1dPoolLayer(Layer):
+    """
+    This interface is used to construct a callable object of the ``Conv1DPoolLayer``
+    class. The ``Conv1DPoolLayer`` class does a ``Conv1D`` and a ``Pool1D`` .
+    For more details, refer to code examples.The ``Conv1DPoolLayer`` layer calculates
+    the output based on the input, filter and strides, paddings, dilations, groups,
+    global_pooling, pool_type, ceil_mode, exclusive parameters.
+
+    Parameters:
+        num_channels (int): The number of channels in the input data.
+        num_filters(int): The number of filters. It is the same as the output channels.
+        filter_size (int): The filter size of Conv1DPoolLayer.       
+        pool_size (int): The pooling size of Conv1DPoolLayer.
+        conv_stride (int): The stride size of the conv Layer in Conv1DPoolLayer.
+            Default: 1
+        pool_stride (int): The stride size of the pool layer in Conv1DPoolLayer.
+            Default: 1
+        conv_padding (int): The padding size of the conv Layer in Conv1DPoolLayer.
+            Default: 0
+        pool_padding (int): The padding of pool layer in Conv1DPoolLayer.
+            Default: 0
+        act (str): Activation type for conv layer, if it is set to None, activation
+            is not appended. Default: None.
+        pool_type (str): Pooling type can be `max` for max-pooling or `avg` for
+            average-pooling. Default: `max`
+        dilation (int): The dilation size of the conv Layer. Default: 1.
+        groups (int): The groups number of the conv Layer. According to grouped
+            convolution in Alex Krizhevsky's Deep CNN paper: when group=2, the
+            first half of the filters is only connected to the first half of the
+            input channels, while the second half of the filters is only connected
+            to the second half of the input channels. Default: 1.
+        global_pooling (bool): Whether to use the global pooling. If it is true, 
+                `pool_size` and `pool_padding` would be ignored. Default: False
+        ceil_mode (bool, optional): Whether to use the ceil function to calculate output 
+                height and width.False is the default. If it is set to False, the floor function 
+                will be used. Default: False.
+        exclusive (bool, optional): Whether to exclude padding points in average pooling mode. 
+                Default: True.
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: False
+        param_attr (ParamAttr|None): The parameter attribute for learnable parameters/weights
+            of conv2d. If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as param_attr. If the Initializer of the param_attr
+            is not set, the parameter is initialized with :math:`Normal(0.0, std)`,
+            and the :math:`std` is :math:`(\\frac{2.0 }{filter\_elem\_num})^{0.5}`. Default: None.
+        bias_attr (ParamAttr|bool|None): The parameter attribute for the bias of conv2d.
+            If it is set to False, no bias will be added to the output units.
+            If it is set to None or one attribute of ParamAttr, conv2d
+            will create ParamAttr as bias_attr. If the Initializer of the bias_attr
+            is not set, the bias is initialized zero. Default: None.
+
+    Example:
+        .. code-block:: python
+
+            import paddle
+            import paddle.fluid as fluid
+            from paddle.incubate.hapi.text import Conv1dPoolLayer
+
+            # input: [batch_size, num_channels, sequence_length]
+            input = paddle.rand((2, 32, 4))
+            cov2d = Conv1dPoolLayer(num_channels=32,
+                                    num_filters=64,
+                                    filter_size=2,
+                                    pool_size=2)
+            output = cov2d(input)
+    """
+
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 pool_size,
+                 conv_stride=1,
+                 pool_stride=1,
+                 conv_padding=0,
+                 pool_padding=0,
+                 act=None,
+                 pool_type='max',
+                 global_pooling=False,
+                 dilation=1,
+                 groups=None,
+                 ceil_mode=False,
+                 exclusive=True,
+                 use_cudnn=False,
+                 param_attr=None,
+                 bias_attr=None):
+        super(Conv1dPoolLayer, self).__init__()
+        self._conv2d = Conv2D(
+            num_channels=num_channels,
+            num_filters=num_filters,
+            filter_size=[filter_size, 1],
+            stride=[conv_stride, 1],
+            padding=[conv_padding, 0],
+            dilation=[dilation, 1],
+            groups=groups,
+            param_attr=param_attr,
+            bias_attr=bias_attr,
+            use_cudnn=use_cudnn,
+            act=act)
+        self._pool2d = Pool2D(
+            pool_size=[pool_size, 1],
+            pool_type=pool_type,
+            pool_stride=[pool_stride, 1],
+            pool_padding=[pool_padding, 0],
+            global_pooling=global_pooling,
+            use_cudnn=use_cudnn,
+            ceil_mode=ceil_mode,
+            exclusive=exclusive)
+
+    def forward(self, input):
+        """
+        Performs conv1d and pool1d on the input.
+
+        Parameters:
+            input (Variable): A 3-D Tensor, shape is [N, C, H] where N, C and H
+                representing `batch_size`, `num_channels` and `sequence_length`
+                separately. data type can be float32 or float64
+        
+        Returns:
+            Variable: The 3-D output tensor after conv and pool. It has the same \
+                data type as input.
+        """
+        x = fluid.layers.unsqueeze(input, axes=[-1])
+        x = self._conv2d(x)
+        x = self._pool2d(x)
+        x = fluid.layers.squeeze(x, axes=[-1])
+        return x
+
+
+class CNNEncoder(Layer):
+    """
+    This interface is used to construct a callable object of the ``CNNEncoder``
+    class. The ``CNNEncoder`` is composed of multiple ``Conv1dPoolLayer`` .
+    ``CNNEncoder`` can define every Conv1dPoolLayer with different or same parameters.
+    The ``Conv1dPoolLayer`` in ``CNNEncoder`` is parallel. The results of every 
+    ``Conv1dPoolLayer`` will concat at the channel dimension as the final output.
+
+    Parameters:
+        num_channels(int|list|tuple): The number of channels in the input data. If
+            `num_channels` is a list or tuple, the length of `num_channels` must
+            equal to `num_layers`. If `num_channels` is a int, all conv1dpoollayer's
+            `num_channels` are the value of `num_channels`. 
+        num_filters(int|list|tuple): The number of filters. It is the same as the
+            output channels. If `num_filters` is a list or tuple, the length of
+            `num_filters` must equal `num_layers`. If `num_filters` is a int,
+            all conv1dpoollayer's `num_filters` are the value of `num_filters`.
+        filter_size(int|list|tuple): The filter size of Conv1DPoolLayer in CNNEncoder.
+            If `filter_size` is a list or tuple, the length of `filter_size` must
+            equal `num_layers`. If `filter_size` is a int, all conv1dpoollayer's
+            `filter_size` are the value of `filter_size`. 
+        pool_size(int|list|tuple): The pooling size of Conv1DPoolLayer in CNNEncoder.
+            If `pool_size` is a list or tuple, the length of `pool_size` must equal
+            `num_layers`. If `pool_size` is a int, all conv1dpoollayer's `pool_size`
+            are the value of `pool_size`.
+        num_layers(int): The number of conv1dpoolLayer used in CNNEncoder.
+        conv_stride(int|list|tuple): The stride size of the conv Layer in Conv1DPoolLayer.
+            If `conv_stride` is a list or tuple, the length of `conv_stride` must
+            equal `num_layers`. If conv_stride is a int, all conv1dpoollayer's `conv_stride`
+            are the value of `conv_stride`. Default: 1
+        pool_stride(int|list|tuple): The stride size of the pool layer in Conv1DPoolLayer.
+            If `pool_stride` is a list or tuple, the length of `pool_stride` must
+            equal `num_layers`. If `pool_stride` is a int, all conv1dpoollayer's `pool_stride`
+            are the value of `pool_stride`. Default: 1
+        conv_padding(int|list|tuple): The padding size of the conv Layer in Conv1DPoolLayer.
+            If `conv_padding` is a list or tuple, the length of `conv_padding` must
+            equal `num_layers`. If `conv_padding` is a int, all conv1dpoollayer's `conv_padding`
+            are the value of `conv_padding`. Default: 0
+        pool_padding(int|list|tuple): The padding size of pool layer in Conv1DPoolLayer.
+            If `pool_padding` is a list or tuple, the length of `pool_padding` must
+            equal `num_layers`.If `pool_padding` is a int, all conv1dpoollayer's `pool_padding`
+            are the value of `pool_padding`. Default: 0
+        act (str|list|tuple): Activation type for `Conv1dPoollayer` layer, if it is set to None,
+            activation is not appended. Default: None.
+        pool_type (str): Pooling type can be `max` for max-pooling or `avg` for
+            average-pooling. Default: `max`
+        global_pooling (bool): Whether to use the global pooling. If it is true, 
+            `pool_size` and `pool_padding` would be ignored. Default: False
+        use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
+            library is installed. Default: False
+    
+    Example:
+        .. code-block:: python
+
+            import paddle
+            import paddle.fluid as fluid
+            from paddle.incubate.hapi.text import CNNEncoder
+
+            # input: [batch_size, num_channels, sequence_length]
+            input = paddle.rand((2, 32, 8))
+            cov_encoder = CNNEncoder(num_layers=2,
+                                     num_channels=32,
+                                     num_filters=64,
+                                     filter_size=[2, 3],
+                                     pool_size=[7, 6])
+            output = cov_encoder(input)  # [2, 128, 1]
+    """
+
+    def __init__(self,
+                 num_channels,
+                 num_filters,
+                 filter_size,
+                 pool_size,
+                 num_layers=1,
+                 conv_stride=1,
+                 pool_stride=1,
+                 conv_padding=0,
+                 pool_padding=0,
+                 act=None,
+                 pool_type='max',
+                 global_pooling=False,
+                 use_cudnn=False):
+        super(CNNEncoder, self).__init__()
+        self.num_layers = num_layers
+        self.num_channels = num_channels
+        self.num_filters = num_filters
+        self.filter_size = filter_size
+        self.pool_size = pool_size
+        self.conv_stride = conv_stride
+        self.pool_stride = pool_stride
+        self.conv_padding = conv_padding
+        self.pool_padding = pool_padding
+        self.use_cudnn = use_cudnn
+        self.act = act
+        self.pool_type = pool_type
+        self.global_pooling = global_pooling
+        self.conv1d_pool_layers = fluid.dygraph.LayerList([
+            Conv1dPoolLayer(
+                num_channels=self.num_channels if
+                isinstance(self.num_channels, int) else self.num_channels[i],
+                num_filters=self.num_filters
+                if isinstance(self.num_channels, int) else self.num_filters[i],
+                filter_size=self.filter_size
+                if isinstance(self.filter_size, int) else self.filter_size[i],
+                pool_size=self.pool_size
+                if isinstance(self.pool_size, int) else self.pool_size[i],
+                conv_stride=self.conv_stride
+                if isinstance(self.conv_stride, int) else self.conv_stride[i],
+                pool_stride=self.pool_stride
+                if isinstance(self.pool_stride, int) else self.pool_stride[i],
+                conv_padding=self.conv_padding
+                if isinstance(self.conv_padding,
+                              int) else self.conv_padding[i],
+                pool_padding=self.pool_padding
+                if isinstance(self.pool_padding,
+                              int) else self.pool_padding[i],
+                act=self.act[i]
+                if isinstance(self.act, (list, tuple)) else self.act,
+                pool_type=self.pool_type,
+                global_pooling=self.global_pooling,
+                use_cudnn=self.use_cudnn) for i in range(num_layers)
+        ])
+
+    def forward(self, input):
+        """
+        Performs multiple parallel conv1d and pool1d, and concat the results of
+        them at the channel dimension to produce the final output.
+
+        Parameters:
+            input (Variable): A 3-D Tensor, shape is [N, C, H] where N, C and H
+                representing `batch_size`, `num_channels` and `sequence_length`
+                separately. data type can be float32 or float64
+        
+        Returns:
+            Variable: The 3-D output tensor produced by concatenating results of \
+                all Conv1dPoolLayer. It has the same data type as input.
+        """
+        res = [
+            conv1d_pool_layer(input)
+            for conv1d_pool_layer in self.conv1d_pool_layers
+        ]
+        out = fluid.layers.concat(input=res, axis=1)
+        return out
+
+
 class TransformerCell(Layer):
    """
    TransformerCell wraps a Transformer decoder producing logits from `inputs`