diff --git a/ocr/data.py b/ocr/data.py index 23e676e2625d3c75be9bec9b00777a11c38e0e6e..30ab9e96f75843d7c38054bf7252bcb3b3491508 100644 --- a/ocr/data.py +++ b/ocr/data.py @@ -15,8 +15,7 @@ import logging logger = logging.getLogger(__name__) import paddle -from paddle import fluid -from paddle.fluid.dygraph.parallel import ParallelEnv +from paddle.distributed import ParallelEnv DATA_MD5 = "7256b1d5420d8c3e74815196e58cdad5" DATA_URL = "http://paddle-ocr-data.bj.bcebos.com/data.tar.gz" @@ -97,7 +96,7 @@ class PadTarget(object): return samples -class BatchSampler(fluid.io.BatchSampler): +class BatchSampler(paddle.io.BatchSampler): def __init__(self, dataset, batch_size, diff --git a/ocr/eval.py b/ocr/eval.py index f1e0f2bef6bc8361a34f7ee37513f202c6cc300c..9d70e85600224abb6c63a851a4183308ce7a0160 100644 --- a/ocr/eval.py +++ b/ocr/eval.py @@ -17,7 +17,6 @@ import argparse import functools import paddle -import paddle.fluid as fluid from paddle.static import InputSpec as Input from paddle.vision.transforms import BatchCompose @@ -47,7 +46,7 @@ add_arg('dynamic', bool, False, "Whether to use dygraph. def main(FLAGS): device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") - fluid.enable_dygraph(device) if FLAGS.dynamic else None + paddle.disable_static(device) if FLAGS.dynamic else None # yapf: disable inputs = [ @@ -79,7 +78,7 @@ def main(FLAGS): batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) - test_loader = fluid.io.DataLoader( + test_loader = paddle.io.DataLoader( test_dataset, batch_sampler=test_sampler, places=device, @@ -94,7 +93,7 @@ def main(FLAGS): def beam_search(FLAGS): device = set_device("gpu" if FLAGS.use_gpu else "cpu") - fluid.enable_dygraph(device) if FLAGS.dynamic else None + paddle.disable_static(device) if FLAGS.dynamic else None # yapf: disable inputs = [ @@ -128,7 +127,7 @@ def beam_search(FLAGS): batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) - test_loader = fluid.io.DataLoader( + test_loader = paddle.io.DataLoader( test_dataset, batch_sampler=test_sampler, places=device, diff --git a/ocr/predict.py b/ocr/predict.py index c2d3fab504b5b0415d11c3482f5bae634cb0fc38..69b69027fbfecbc22a3d23d89eaa1593244b5cfc 100644 --- a/ocr/predict.py +++ b/ocr/predict.py @@ -23,7 +23,6 @@ import functools from PIL import Image import paddle -import paddle.fluid as fluid from paddle.static import InputSpec as Input from paddle.vision.datasets.folder import ImageFolder @@ -53,7 +52,7 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.") def main(FLAGS): device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") - fluid.enable_dygraph(device) if FLAGS.dynamic else None + paddle.disable_static(device) if FLAGS.dynamic else None inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ] model = paddle.Model( @@ -71,7 +70,7 @@ def main(FLAGS): fn = lambda p: Image.open(p).convert('L') test_dataset = ImageFolder(FLAGS.image_path, loader=fn) test_collate_fn = BatchCompose([data.Resize(), data.Normalize()]) - test_loader = fluid.io.DataLoader( + test_loader = paddle.io.DataLoader( test_dataset, places=device, num_workers=0, diff --git a/ocr/seq2seq_attn.py b/ocr/seq2seq_attn.py index e0f19e2e4f8372ff700be0997fb0c19654437152..8a9e4eaf68c41cd8636bae8b3ac21ead0d48cf12 100644 --- a/ocr/seq2seq_attn.py +++ b/ocr/seq2seq_attn.py @@ -16,11 +16,10 @@ from __future__ import print_function import numpy as np import paddle -import paddle.fluid as fluid -import paddle.fluid.layers as layers -from paddle.fluid.layers import BeamSearchDecoder - -from paddle.text import RNNCell, RNN, DynamicDecode +import paddle.nn as nn +import paddle.nn.functional as F +#from paddle.text import RNNCell, RNN, DynamicDecode +from paddle.text import DynamicDecode, BeamSearchDecoder class ConvBNPool(paddle.nn.Layer): @@ -36,103 +35,99 @@ class ConvBNPool(paddle.nn.Layer): filter_size = 3 std = (2.0 / (filter_size**2 * in_ch))**0.5 - param_0 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, std)) + param_0 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Normal(0.0, std)) std = (2.0 / (filter_size**2 * out_ch))**0.5 - param_1 = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, std)) - - self.conv0 = fluid.dygraph.Conv2D( - in_ch, - out_ch, - 3, - padding=1, - param_attr=param_0, - bias_attr=False, - act=None, - use_cudnn=use_cudnn) - self.bn0 = fluid.dygraph.BatchNorm(out_ch, act=act) - self.conv1 = fluid.dygraph.Conv2D( - out_ch, - out_ch, - filter_size=3, - padding=1, - param_attr=param_1, - bias_attr=False, - act=None, - use_cudnn=use_cudnn) - self.bn1 = fluid.dygraph.BatchNorm(out_ch, act=act) + param_1 = paddle.ParamAttr( + initializer=paddle.nn.initializer.Normal(0.0, std)) + + net = [ + nn.Conv2d( + in_ch, + out_ch, + 3, + padding=1, + weight_attr=param_0, + bias_attr=False), + nn.BatchNorm2d(out_ch), + ] + if act == 'relu': + net += [nn.ReLU()] + + net += [ + nn.Conv2d( + out_ch, + out_ch, + kernel_size=3, + padding=1, + weight_attr=param_1, + bias_attr=False), + nn.BatchNorm2d(out_ch), + ] + if act == 'relu': + net += [nn.ReLU()] if self.pool: - self.pool = fluid.dygraph.Pool2D( - pool_size=2, - pool_type='max', - pool_stride=2, - use_cudnn=use_cudnn, - ceil_mode=True) + net += [nn.MaxPool2d(kernel_size=2, stride=2, ceil_mode=True)] + self.net = nn.Sequential(*net) def forward(self, inputs): - out = self.conv0(inputs) - out = self.bn0(out) - out = self.conv1(out) - out = self.bn1(out) - if self.pool: - out = self.pool(out) - return out + return self.net(inputs) class CNN(paddle.nn.Layer): def __init__(self, in_ch=1, is_test=False): super(CNN, self).__init__() - self.conv_bn1 = ConvBNPool(in_ch, 16) - self.conv_bn2 = ConvBNPool(16, 32) - self.conv_bn3 = ConvBNPool(32, 64) - self.conv_bn4 = ConvBNPool(64, 128, pool=False) + net = [ + ConvBNPool(in_ch, 16), + ConvBNPool(16, 32), + ConvBNPool(32, 64), + ConvBNPool( + 64, 128, pool=False), + ] + self.net = nn.Sequential(*net) def forward(self, inputs): - conv = self.conv_bn1(inputs) - conv = self.conv_bn2(conv) - conv = self.conv_bn3(conv) - conv = self.conv_bn4(conv) - return conv - - -class GRUCell(RNNCell): - def __init__(self, - input_size, - hidden_size, - param_attr=None, - bias_attr=None, - gate_activation='sigmoid', - candidate_activation='tanh', - origin_mode=False): - super(GRUCell, self).__init__() - self.hidden_size = hidden_size - self.fc_layer = fluid.dygraph.Linear( - input_size, - hidden_size * 3, - param_attr=param_attr, - bias_attr=False) - - self.gru_unit = fluid.dygraph.GRUUnit( - hidden_size * 3, - param_attr=param_attr, - bias_attr=bias_attr, - activation=candidate_activation, - gate_activation=gate_activation, - origin_mode=origin_mode) - - def forward(self, inputs, states): - # step_outputs, new_states = cell(step_inputs, states) - # for GRUCell, `step_outputs` and `new_states` both are hidden - x = self.fc_layer(inputs) - hidden, _, _ = self.gru_unit(x, states) - return hidden, hidden - - @property - def state_shape(self): - return [self.hidden_size] + return self.net(inputs) + + +#class GRUCell(RNNCell): +# def __init__(self, +# input_size, +# hidden_size, +# param_attr=None, +# bias_attr=None, +# gate_activation='sigmoid', +# candidate_activation='tanh', +# origin_mode=False): +# super(GRUCell, self).__init__() +# self.hidden_size = hidden_size +# self.fc_layer = nn.Linear( +# input_size, +# hidden_size * 3, +# weight_attr=param_attr, +# bias_attr=False) +# +# self.gru_unit = fluid.dygraph.GRUUnit( +# hidden_size * 3, +# param_attr=param_attr, +# bias_attr=bias_attr, +# activation=candidate_activation, +# gate_activation=gate_activation, +# origin_mode=origin_mode) +# +# def forward(self, inputs, states): +# # step_outputs, new_states = cell(step_inputs, states) +# # for GRUCell, `step_outputs` and `new_states` both are hidden +# x = self.fc_layer(inputs) +# hidden, _, _ = self.gru_unit(x, states) +# return hidden, hidden +# +# @property +# def state_shape(self): +# return [self.hidden_size] +# class Encoder(paddle.nn.Layer): @@ -147,41 +142,41 @@ class Encoder(paddle.nn.Layer): self.backbone = CNN(in_ch=in_channel, is_test=is_test) - para_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02)) - bias_attr = fluid.ParamAttr( - initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) - self.gru_fwd = RNN(cell=GRUCell( - input_size=128 * 6, - hidden_size=rnn_hidden_size, - param_attr=para_attr, - bias_attr=bias_attr, - candidate_activation='relu'), - is_reverse=False, - time_major=False) - self.gru_bwd = RNN(cell=GRUCell( - input_size=128 * 6, - hidden_size=rnn_hidden_size, - param_attr=para_attr, - bias_attr=bias_attr, - candidate_activation='relu'), - is_reverse=True, - time_major=False) - self.encoded_proj_fc = fluid.dygraph.Linear( + para_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.Normal(0.0, 0.02)) + bias_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.Normal(0.0, 0.02), + learning_rate=2.0) + self.gru_fwd = nn.RNN( + cell=nn.GRUCell( + input_size=128 * 6, hidden_size=rnn_hidden_size), + # param_attr=para_attr, + # bias_attr=bias_attr, + # candidate_activation='relu'), + is_reverse=False, + time_major=False) + self.gru_bwd = nn.RNN( + cell=nn.GRUCell( + input_size=128 * 6, hidden_size=rnn_hidden_size), + # param_attr=para_attr, + # bias_attr=bias_attr, + # candidate_activation='relu'), + is_reverse=True, + time_major=False) + self.encoded_proj_fc = nn.Linear( rnn_hidden_size * 2, decoder_size, bias_attr=False) def forward(self, inputs): conv_features = self.backbone(inputs) - conv_features = fluid.layers.transpose( - conv_features, perm=[0, 3, 1, 2]) + conv_features = paddle.transpose(conv_features, perm=[0, 3, 1, 2]) n, w, c, h = conv_features.shape - seq_feature = fluid.layers.reshape(conv_features, [0, -1, c * h]) + seq_feature = paddle.reshape(conv_features, [0, -1, c * h]) gru_fwd, _ = self.gru_fwd(seq_feature) gru_bwd, _ = self.gru_bwd(seq_feature) - encoded_vector = fluid.layers.concat(input=[gru_fwd, gru_bwd], axis=2) + encoded_vector = paddle.concat([gru_fwd, gru_bwd], axis=2) encoded_proj = self.encoded_proj_fc(encoded_vector) return gru_bwd, encoded_vector, encoded_proj @@ -194,39 +189,37 @@ class Attention(paddle.nn.Layer): def __init__(self, decoder_size): super(Attention, self).__init__() - self.fc1 = fluid.dygraph.Linear( - decoder_size, decoder_size, bias_attr=False) - self.fc2 = fluid.dygraph.Linear(decoder_size, 1, bias_attr=False) + self.fc1 = nn.Linear(decoder_size, decoder_size, bias_attr=False) + self.fc2 = nn.Linear(decoder_size, 1, bias_attr=False) def forward(self, encoder_vec, encoder_proj, decoder_state): # alignment model, single-layer multilayer perceptron decoder_state = self.fc1(decoder_state) - decoder_state = fluid.layers.unsqueeze(decoder_state, [1]) + decoder_state = paddle.unsqueeze(decoder_state, [1]) - e = fluid.layers.elementwise_add(encoder_proj, decoder_state) - e = fluid.layers.tanh(e) + e = paddle.add(encoder_proj, decoder_state) + e = paddle.tanh(e) att_scores = self.fc2(e) - att_scores = fluid.layers.squeeze(att_scores, [2]) - att_scores = fluid.layers.softmax(att_scores) + att_scores = paddle.squeeze(att_scores, [2]) + att_scores = F.softmax(att_scores) - context = fluid.layers.elementwise_mul( - x=encoder_vec, y=att_scores, axis=0) - context = fluid.layers.reduce_sum(context, dim=1) + context = paddle.multiply(encoder_vec, att_scores, axis=0) + context = paddle.reduce_sum(context, dim=1) return context -class DecoderCell(RNNCell): +class DecoderCell(nn.RNNCellBase): def __init__(self, encoder_size=200, decoder_size=128): super(DecoderCell, self).__init__() self.attention = Attention(decoder_size) - self.gru_cell = GRUCell( + self.gru_cell = nn.GRUCell( input_size=encoder_size * 2 + decoder_size, hidden_size=decoder_size) def forward(self, current_word, states, encoder_vec, encoder_proj): context = self.attention(encoder_vec, encoder_proj, states) - decoder_inputs = fluid.layers.concat([current_word, context], axis=1) + decoder_inputs = paddle.concat([current_word, context], axis=1) hidden, _ = self.gru_cell(decoder_inputs, states) return hidden, hidden @@ -234,9 +227,9 @@ class DecoderCell(RNNCell): class Decoder(paddle.nn.Layer): def __init__(self, num_classes, emb_dim, encoder_size, decoder_size): super(Decoder, self).__init__() - self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size)) - self.fc = fluid.dygraph.Linear( - decoder_size, num_classes + 2, act='softmax') + self.decoder_attention = nn.RNN( + DecoderCell(encoder_size, decoder_size)) + self.fc = nn.Linear(decoder_size, num_classes + 2) def forward(self, target, initial_states, encoder_vec, encoder_proj): out, _ = self.decoder_attention( @@ -258,13 +251,10 @@ class Seq2SeqAttModel(paddle.nn.Layer): num_classes=None, ): super(Seq2SeqAttModel, self).__init__() self.encoder = Encoder(in_channle, encoder_size, decoder_size) - self.fc = fluid.dygraph.Linear( - input_dim=encoder_size, - output_dim=decoder_size, - bias_attr=False, - act='relu') - self.embedding = fluid.dygraph.Embedding( - [num_classes + 2, emb_dim], dtype='float32') + self.fc = nn.Sequential( + nn.Linear( + encoder_size, decoder_size, bias_attr=False), nn.ReLU()) + self.embedding = nn.Embedding(num_classes + 2, emb_dim) self.decoder = Decoder(num_classes, emb_dim, encoder_size, decoder_size) @@ -326,7 +316,10 @@ class WeightCrossEntropy(paddle.nn.Layer): super(WeightCrossEntropy, self).__init__() def forward(self, predict, label, mask): - loss = layers.cross_entropy(predict, label=label) - loss = layers.elementwise_mul(loss, mask, axis=0) - loss = layers.reduce_sum(loss) + predict = paddle.flatten(predict, start_axis=0, stop_axis=1) + label = paddle.reshape(label, shape=[-1, 1]) + mask = paddle.reshape(mask, shape=[-1, 1]) + loss = F.cross_entropy(predict, label=label) + loss = paddle.multiply(loss, mask, axis=0) + loss = paddle.sum(loss) return loss diff --git a/ocr/train.py b/ocr/train.py index 364f39e1360aa1e344915b9d66addd101a5d0740..c72f142527a291ceeafad07f2c815172e8fc3087 100644 --- a/ocr/train.py +++ b/ocr/train.py @@ -59,7 +59,7 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.") def main(FLAGS): device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu") - fluid.enable_dygraph(device) if FLAGS.dynamic else None + paddle.disable_static(device) if FLAGS.dynamic else None # yapf: disable inputs = [ @@ -100,7 +100,7 @@ def main(FLAGS): [data.Resize(), data.Normalize(), data.PadTarget()]) train_sampler = data.BatchSampler( train_dataset, batch_size=FLAGS.batch_size, shuffle=True) - train_loader = fluid.io.DataLoader( + train_loader = paddle.io.DataLoader( train_dataset, batch_sampler=train_sampler, places=device, @@ -115,7 +115,7 @@ def main(FLAGS): batch_size=FLAGS.batch_size, drop_last=False, shuffle=False) - test_loader = fluid.io.DataLoader( + test_loader = paddle.io.DataLoader( test_dataset, batch_sampler=test_sampler, places=device, diff --git a/ocr/utility.py b/ocr/utility.py index b3c6230ee603b451feb06cc69a5996cde701a588..078ad03d0bffe556dab58f302e86d8ad4e6ead0a 100644 --- a/ocr/utility.py +++ b/ocr/utility.py @@ -21,7 +21,6 @@ import numpy as np import six import paddle -import paddle.fluid as fluid from paddle.metric import Metric @@ -74,8 +73,8 @@ class SeqAccuracy(Metric): self.reset() def compute(self, output, label, mask, *args, **kwargs): - pred = fluid.layers.flatten(output, axis=2) - score, topk = fluid.layers.topk(pred, 1) + pred = paddle.flatten(output, start_axis=2) + score, topk = paddle.topk(pred, 1) return topk, label, mask def update(self, topk, label, mask, *args, **kwargs):