未验证 提交 2a3b9976 编写于 作者: Q qingqing01 提交者: GitHub

Merge pull request #89 from qingqing01/cyclegan_ocr_update

Update OCR model
...@@ -16,11 +16,11 @@ from __future__ import print_function ...@@ -16,11 +16,11 @@ from __future__ import print_function
import argparse import argparse
import functools import functools
import paddle.fluid.profiler as profiler import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from paddle.incubate.hapi.vision.transforms import BatchCompose from paddle.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy from utility import SeqAccuracy, LoggerCallBack, SeqBeamAccuracy
...@@ -46,13 +46,8 @@ add_arg('dynamic', bool, False, "Whether to use dygraph. ...@@ -46,13 +46,8 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.
def main(FLAGS): def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu") device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes)
# yapf: disable # yapf: disable
inputs = [ inputs = [
...@@ -64,13 +59,16 @@ def main(FLAGS): ...@@ -64,13 +59,16 @@ def main(FLAGS):
Input([None, None], "float32", name="mask") Input([None, None], "float32", name="mask")
] ]
# yapf: enable # yapf: enable
model = paddle.Model(
model.prepare( Seq2SeqAttModel(
loss_function=WeightCrossEntropy(), encoder_size=FLAGS.encoder_size,
metrics=SeqAccuracy(), decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes),
inputs=inputs, inputs=inputs,
labels=labels, labels=labels)
device=device)
model.prepare(loss=WeightCrossEntropy(), metrics=SeqAccuracy())
model.load(FLAGS.init_model) model.load(FLAGS.init_model)
test_dataset = data.test() test_dataset = data.test()
...@@ -97,29 +95,29 @@ def main(FLAGS): ...@@ -97,29 +95,29 @@ def main(FLAGS):
def beam_search(FLAGS): def beam_search(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu") device = set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
# yapf: disable
inputs = [ inputs = [
Input( Input([None, 1, 48, 384], "float32", name="pixel"),
[None, 1, 48, 384], "float32", name="pixel"), Input( Input([None, None], "int64", name="label_in")
[None, None], "int64", name="label_in")
] ]
labels = [ labels = [
Input( Input([None, None], "int64", name="label_out"),
[None, None], "int64", name="label_out"), Input( Input([None, None], "float32", name="mask")
[None, None], "float32", name="mask")
] ]
model.prepare( # yapf: enable
loss_function=None,
metrics=SeqBeamAccuracy(), model = paddle.Model(
Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size),
inputs=inputs, inputs=inputs,
labels=labels, labels=labels)
device=device)
model.prepare(loss_function=None, metrics=SeqBeamAccuracy())
model.load(FLAGS.init_model) model.load(FLAGS.init_model)
test_dataset = data.test() test_dataset = data.test()
......
...@@ -22,12 +22,12 @@ import argparse ...@@ -22,12 +22,12 @@ import argparse
import functools import functools
from PIL import Image from PIL import Image
import paddle.fluid.profiler as profiler import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from paddle.incubate.hapi.datasets.folder import ImageFolder from paddle.vision.datasets.folder import ImageFolder
from paddle.incubate.hapi.vision.transforms import BatchCompose from paddle.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
from utility import postprocess, index2word from utility import postprocess, index2word
...@@ -52,18 +52,20 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.") ...@@ -52,18 +52,20 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.")
def main(FLAGS): def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu") device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size)
inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ] inputs = [Input([None, 1, 48, 384], "float32", name="pixel"), ]
model = paddle.Model(
Seq2SeqAttInferModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes,
beam_size=FLAGS.beam_size),
inputs)
model.prepare(inputs=inputs, device=device) model.prepare()
model.load(FLAGS.init_model) model.load(FLAGS.init_model)
fn = lambda p: Image.open(p).convert('L') fn = lambda p: Image.open(p).convert('L')
......
...@@ -15,16 +15,15 @@ from __future__ import print_function ...@@ -15,16 +15,15 @@ from __future__ import print_function
import numpy as np import numpy as np
import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
from paddle.fluid.layers import BeamSearchDecoder from paddle.fluid.layers import BeamSearchDecoder
from paddle.incubate.hapi.text import RNNCell, RNN, DynamicDecode from paddle.text import RNNCell, RNN, DynamicDecode
from paddle.incubate.hapi.model import Model
from paddle.incubate.hapi.loss import Loss
class ConvBNPool(fluid.dygraph.Layer): class ConvBNPool(paddle.nn.Layer):
def __init__(self, def __init__(self,
in_ch, in_ch,
out_ch, out_ch,
...@@ -83,7 +82,7 @@ class ConvBNPool(fluid.dygraph.Layer): ...@@ -83,7 +82,7 @@ class ConvBNPool(fluid.dygraph.Layer):
return out return out
class CNN(fluid.dygraph.Layer): class CNN(paddle.nn.Layer):
def __init__(self, in_ch=1, is_test=False): def __init__(self, in_ch=1, is_test=False):
super(CNN, self).__init__() super(CNN, self).__init__()
self.conv_bn1 = ConvBNPool(in_ch, 16) self.conv_bn1 = ConvBNPool(in_ch, 16)
...@@ -136,7 +135,7 @@ class GRUCell(RNNCell): ...@@ -136,7 +135,7 @@ class GRUCell(RNNCell):
return [self.hidden_size] return [self.hidden_size]
class Encoder(fluid.dygraph.Layer): class Encoder(paddle.nn.Layer):
def __init__( def __init__(
self, self,
in_channel=1, in_channel=1,
...@@ -187,7 +186,7 @@ class Encoder(fluid.dygraph.Layer): ...@@ -187,7 +186,7 @@ class Encoder(fluid.dygraph.Layer):
return gru_bwd, encoded_vector, encoded_proj return gru_bwd, encoded_vector, encoded_proj
class Attention(fluid.dygraph.Layer): class Attention(paddle.nn.Layer):
""" """
Neural Machine Translation by Jointly Learning to Align and Translate. Neural Machine Translation by Jointly Learning to Align and Translate.
https://arxiv.org/abs/1409.0473 https://arxiv.org/abs/1409.0473
...@@ -232,7 +231,7 @@ class DecoderCell(RNNCell): ...@@ -232,7 +231,7 @@ class DecoderCell(RNNCell):
return hidden, hidden return hidden, hidden
class Decoder(fluid.dygraph.Layer): class Decoder(paddle.nn.Layer):
def __init__(self, num_classes, emb_dim, encoder_size, decoder_size): def __init__(self, num_classes, emb_dim, encoder_size, decoder_size):
super(Decoder, self).__init__() super(Decoder, self).__init__()
self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size)) self.decoder_attention = RNN(DecoderCell(encoder_size, decoder_size))
...@@ -249,7 +248,7 @@ class Decoder(fluid.dygraph.Layer): ...@@ -249,7 +248,7 @@ class Decoder(fluid.dygraph.Layer):
return pred return pred
class Seq2SeqAttModel(Model): class Seq2SeqAttModel(paddle.nn.Layer):
def __init__( def __init__(
self, self,
in_channle=1, in_channle=1,
...@@ -322,12 +321,11 @@ class Seq2SeqAttInferModel(Seq2SeqAttModel): ...@@ -322,12 +321,11 @@ class Seq2SeqAttInferModel(Seq2SeqAttModel):
return rs return rs
class WeightCrossEntropy(Loss): class WeightCrossEntropy(paddle.nn.Layer):
def __init__(self): def __init__(self):
super(WeightCrossEntropy, self).__init__(average=False) super(WeightCrossEntropy, self).__init__()
def forward(self, outputs, labels): def forward(self, predict, label, mask):
predict, (label, mask) = outputs[0], labels
loss = layers.cross_entropy(predict, label=label) loss = layers.cross_entropy(predict, label=label)
loss = layers.elementwise_mul(loss, mask, axis=0) loss = layers.elementwise_mul(loss, mask, axis=0)
loss = layers.reduce_sum(loss) loss = layers.reduce_sum(loss)
......
...@@ -21,11 +21,11 @@ import numpy as np ...@@ -21,11 +21,11 @@ import numpy as np
import argparse import argparse
import functools import functools
import paddle.fluid.profiler as profiler import paddle
import paddle.fluid as fluid import paddle.fluid as fluid
from paddle.incubate.hapi.model import Input, set_device from paddle.static import InputSpec as Input
from paddle.incubate.hapi.vision.transforms import BatchCompose from paddle.vision.transforms import BatchCompose
from utility import add_arguments, print_arguments from utility import add_arguments, print_arguments
from utility import SeqAccuracy, LoggerCallBack from utility import SeqAccuracy, LoggerCallBack
...@@ -58,14 +58,28 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.") ...@@ -58,14 +58,28 @@ add_arg('dynamic', bool, False, "Whether to use dygraph.")
def main(FLAGS): def main(FLAGS):
device = set_device("gpu" if FLAGS.use_gpu else "cpu") device = paddle.set_device("gpu" if FLAGS.use_gpu else "cpu")
fluid.enable_dygraph(device) if FLAGS.dynamic else None fluid.enable_dygraph(device) if FLAGS.dynamic else None
model = Seq2SeqAttModel( # yapf: disable
encoder_size=FLAGS.encoder_size, inputs = [
decoder_size=FLAGS.decoder_size, Input([None,1,48,384], "float32", name="pixel"),
emb_dim=FLAGS.embedding_dim, Input([None, None], "int64", name="label_in"),
num_classes=FLAGS.num_classes) ]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask"),
]
# yapf: enable
model = paddle.Model(
Seq2SeqAttModel(
encoder_size=FLAGS.encoder_size,
decoder_size=FLAGS.decoder_size,
emb_dim=FLAGS.embedding_dim,
num_classes=FLAGS.num_classes),
inputs,
labels)
lr = FLAGS.lr lr = FLAGS.lr
if FLAGS.lr_decay_strategy == "piecewise_decay": if FLAGS.lr_decay_strategy == "piecewise_decay":
...@@ -79,23 +93,7 @@ def main(FLAGS): ...@@ -79,23 +93,7 @@ def main(FLAGS):
parameter_list=model.parameters(), parameter_list=model.parameters(),
grad_clip=grad_clip) grad_clip=grad_clip)
# yapf: disable model.prepare(optimizer, WeightCrossEntropy(), SeqAccuracy())
inputs = [
Input([None,1,48,384], "float32", name="pixel"),
Input([None, None], "int64", name="label_in"),
]
labels = [
Input([None, None], "int64", name="label_out"),
Input([None, None], "float32", name="mask"),
]
# yapf: enable
model.prepare(
optimizer,
WeightCrossEntropy(),
SeqAccuracy(),
inputs=inputs,
labels=labels)
train_dataset = data.train() train_dataset = data.train()
train_collate_fn = BatchCompose( train_collate_fn = BatchCompose(
......
...@@ -18,11 +18,12 @@ from __future__ import division ...@@ -18,11 +18,12 @@ from __future__ import division
from __future__ import print_function from __future__ import print_function
import distutils.util import distutils.util
import numpy as np import numpy as np
import paddle.fluid as fluid
import six import six
from paddle.incubate.hapi.metrics import Metric import paddle
from paddle.incubate.hapi.callbacks import ProgBarLogger import paddle.fluid as fluid
from paddle.metric import Metric
def print_arguments(args): def print_arguments(args):
...@@ -72,7 +73,7 @@ class SeqAccuracy(Metric): ...@@ -72,7 +73,7 @@ class SeqAccuracy(Metric):
self._name = 'seq_acc' self._name = 'seq_acc'
self.reset() self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs): def compute(self, output, label, mask, *args, **kwargs):
pred = fluid.layers.flatten(output, axis=2) pred = fluid.layers.flatten(output, axis=2)
score, topk = fluid.layers.topk(pred, 1) score, topk = fluid.layers.topk(pred, 1)
return topk, label, mask return topk, label, mask
...@@ -102,7 +103,7 @@ class SeqAccuracy(Metric): ...@@ -102,7 +103,7 @@ class SeqAccuracy(Metric):
return self._name return self._name
class LoggerCallBack(ProgBarLogger): class LoggerCallBack(paddle.callbacks.ProgBarLogger):
def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None): def __init__(self, log_freq=1, verbose=2, train_bs=None, eval_bs=None):
super(LoggerCallBack, self).__init__(log_freq, verbose) super(LoggerCallBack, self).__init__(log_freq, verbose)
self.train_bs = train_bs self.train_bs = train_bs
...@@ -153,7 +154,7 @@ class SeqBeamAccuracy(Metric): ...@@ -153,7 +154,7 @@ class SeqBeamAccuracy(Metric):
self._name = 'seq_acc' self._name = 'seq_acc'
self.reset() self.reset()
def add_metric_op(self, output, label, mask, *args, **kwargs): def compute(self, output, label, mask, *args, **kwargs):
return output, label, mask return output, label, mask
def update(self, preds, labels, masks, *args, **kwargs): def update(self, preds, labels, masks, *args, **kwargs):
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册