未验证 提交 358ca21d 编写于 作者: B Bai Yifan 提交者: GitHub

Add deep mutual learning (#302)

上级 e9c8d9bf
# 深度互学习DML(Deep Mutual Learning)
本示例介绍如何使用PaddleSlim的深度互学习DML方法训练模型,算法原理请参考论文 [Deep Mutual Learning](https://arxiv.org/abs/1706.00384)
## 使用数据
示例中使用cifar100数据集进行训练, 您可以在启动训练时等待自动下载,
也可以在自行下载[数据集](https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz)之后,放在当前目录的`./dataset/cifar100`路径下
## 启动命令
单卡训练, 以0号GPU为例:
```bash
CUDA_VISIBLE_DEVICES=0 python dml_train.py
```
多卡训练, 以0-3号GPU为例:
```bash
python -m paddle.distributed.launch --selected_gpus=0,1,2,3 --log_dir ./mylog dml_train.py --use_parallel=True
```
## 实验结果
以下实验结果可以由默认实验配置(学习率、优化器等)训练得到,仅调整了DML训练的模型组合
如果想进一步提升实验结果可以尝试[更多优化tricks](https://arxiv.org/abs/1812.01187), 或进一步增加一次DML训练的模型数量。
| 数据集 | 网络模型 | 单独训练准确率 | 深度互学习准确率 |
| ------ | ------ | ------ | ------ |
| CIFAR100 | MobileNet X 2 | 73.65% | 76.34% (+2.69%) |
| CIFAR100 | MobileNet X 4 | 73.65% | 76.56% (+2.91%) |
| CIFAR100 | MobileNet + ResNet50 | 73.65%/76.52% | 76.00%/77.80% (+2.35%/+1.28%) |
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from PIL import Image
from PIL import ImageOps
import os
import math
import random
import tarfile
import functools
import numpy as np
from PIL import Image, ImageEnhance
import paddle
# for python2/python3 compatiablity
try:
import cPickle
except:
import _pickle as cPickle
IMAGE_SIZE = 32
IMAGE_DEPTH = 3
CIFAR_MEAN = [0.5070751592371323, 0.48654887331495095, 0.4409178433670343]
CIFAR_STD = [0.2673342858792401, 0.2564384629170883, 0.27615047132568404]
URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/'
CIFAR100_URL = URL_PREFIX + 'cifar-100-python.tar.gz'
CIFAR100_MD5 = 'eb9058c3a382ffc7106e4002c42a8d85'
paddle.dataset.common.DATA_HOME = "dataset/"
def preprocess(sample, is_training):
image_array = sample.reshape(IMAGE_DEPTH, IMAGE_SIZE, IMAGE_SIZE)
rgb_array = np.transpose(image_array, (1, 2, 0))
img = Image.fromarray(rgb_array, 'RGB')
if is_training:
# pad, ramdom crop, random_flip_left_right, random_rotation
img = ImageOps.expand(img, (4, 4, 4, 4), fill=0)
left_top = np.random.randint(8, size=2)
img = img.crop((left_top[1], left_top[0], left_top[1] + IMAGE_SIZE,
left_top[0] + IMAGE_SIZE))
if np.random.randint(2):
img = img.transpose(Image.FLIP_LEFT_RIGHT)
random_angle = np.random.randint(-15, 15)
img = img.rotate(random_angle, Image.NEAREST)
img = np.array(img).astype(np.float32)
img_float = img / 255.0
img = (img_float - CIFAR_MEAN) / CIFAR_STD
img = np.transpose(img, (2, 0, 1))
return img
def reader_generator(datasets, batch_size, is_training, is_shuffle):
def read_batch(datasets):
if is_shuffle:
random.shuffle(datasets)
for im, label in datasets:
im = preprocess(im, is_training)
yield im, [int(label)]
def reader():
batch_data = []
batch_label = []
for data in read_batch(datasets):
batch_data.append(data[0])
batch_label.append(data[1])
if len(batch_data) == batch_size:
batch_data = np.array(batch_data, dtype='float32')
batch_label = np.array(batch_label, dtype='int64')
batch_out = [batch_data, batch_label]
yield batch_out
batch_data = []
batch_label = []
return reader
def cifar100_reader(file_name, data_name, is_shuffle):
with tarfile.open(file_name, mode='r') as f:
names = [
each_item.name for each_item in f if data_name in each_item.name
]
names.sort()
datasets = []
for name in names:
print("Reading file " + name)
try:
batch = cPickle.load(
f.extractfile(name), encoding='iso-8859-1')
except:
batch = cPickle.load(f.extractfile(name))
data = batch['data']
labels = batch.get('labels', batch.get('fine_labels', None))
assert labels is not None
dataset = zip(data, labels)
datasets.extend(dataset)
if is_shuffle:
random.shuffle(datasets)
return datasets
def train_valid(batch_size, is_train, is_shuffle):
name = 'train' if is_train else 'test'
datasets = cifar100_reader(
paddle.dataset.common.download(CIFAR100_URL, 'cifar', CIFAR100_MD5),
name, is_shuffle)
reader = reader_generator(datasets, batch_size, is_train, is_shuffle)
return reader
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import argparse
import functools
import logging
import paddle.fluid as fluid
from paddle.fluid.dygraph.base import to_variable
from paddleslim.common import AvgrageMeter, get_logger
from paddleslim.dist import DML
from paddleslim.models.dygraph import MobileNetV1
import cifar100_reader as reader
sys.path[0] = os.path.join(os.path.dirname("__file__"), os.path.pardir)
from utility import add_arguments, print_arguments
logger = get_logger(__name__, level=logging.INFO)
parser = argparse.ArgumentParser(description=__doc__)
add_arg = functools.partial(add_arguments, argparser=parser)
# yapf: disable
add_arg('log_freq', int, 100, "Log frequency.")
add_arg('batch_size', int, 256, "Minibatch size.")
add_arg('init_lr', float, 0.1, "The start learning rate.")
add_arg('use_gpu', bool, True, "Whether use GPU.")
add_arg('epochs', int, 200, "Epoch number.")
add_arg('class_num', int, 100, "Class number of dataset.")
add_arg('trainset_num', int, 50000, "Images number of trainset.")
add_arg('model_save_dir', str, 'saved_models', "The path to save model.")
add_arg('use_multiprocess', bool, True, "Whether use multiprocess reader.")
add_arg('use_parallel', bool, False, "Whether to use data parallel mode to train the model.")
# yapf: enable
def create_optimizer(models, args):
device_num = fluid.dygraph.parallel.Env().nranks
step = int(args.trainset_num / (args.batch_size * device_num))
epochs = [60, 120, 180]
bd = [step * e for e in epochs]
lr = [args.init_lr * (0.1**i) for i in range(len(bd) + 1)]
optimizers = []
for cur_model in models:
learning_rate = fluid.dygraph.PiecewiseDecay(bd, lr, 0)
opt = fluid.optimizer.MomentumOptimizer(
learning_rate,
0.9,
parameter_list=cur_model.parameters(),
use_nesterov=True,
regularization=fluid.regularizer.L2DecayRegularizer(5e-4))
optimizers.append(opt)
return optimizers
def create_reader(place, args):
train_reader = reader.train_valid(
batch_size=args.batch_size, is_train=True, is_shuffle=True)
valid_reader = reader.train_valid(
batch_size=args.batch_size, is_train=False, is_shuffle=False)
if args.use_parallel:
train_reader = fluid.contrib.reader.distributed_batch_reader(
train_reader)
train_loader = fluid.io.DataLoader.from_generator(
capacity=1024,
return_list=True,
use_multiprocess=args.use_multiprocess)
valid_loader = fluid.io.DataLoader.from_generator(
capacity=1024,
return_list=True,
use_multiprocess=args.use_multiprocess)
train_loader.set_batch_generator(train_reader, places=place)
valid_loader.set_batch_generator(valid_reader, places=place)
return train_loader, valid_loader
def train(train_loader, dml_model, dml_optimizer, args):
dml_model.train()
costs = [AvgrageMeter() for i in range(dml_model.model_num)]
accs = [AvgrageMeter() for i in range(dml_model.model_num)]
for step_id, (images, labels) in enumerate(train_loader):
images, labels = to_variable(images), to_variable(labels)
batch_size = images.shape[0]
logits = dml_model.forward(images)
precs = [
fluid.layers.accuracy(
input=l, label=labels, k=1) for l in logits
]
losses = dml_model.loss(logits, labels)
dml_optimizer.minimize(losses)
for i in range(dml_model.model_num):
accs[i].update(precs[i].numpy(), batch_size)
costs[i].update(losses[i].numpy(), batch_size)
model_names = dml_model.full_name()
if step_id % args.log_freq == 0:
log_msg = "Train Step {}".format(step_id)
for model_id, (cost, acc) in enumerate(zip(costs, accs)):
log_msg += ", {} loss: {:.6f} acc: {:.6f}".format(
model_names[model_id], cost.avg[0], acc.avg[0])
logger.info(log_msg)
return costs, accs
def valid(valid_loader, dml_model, args):
dml_model.eval()
costs = [AvgrageMeter() for i in range(dml_model.model_num)]
accs = [AvgrageMeter() for i in range(dml_model.model_num)]
for step_id, (images, labels) in enumerate(valid_loader):
images, labels = to_variable(images), to_variable(labels)
batch_size = images.shape[0]
logits = dml_model.forward(images)
precs = [
fluid.layers.accuracy(
input=l, label=labels, k=1) for l in logits
]
losses = dml_model.loss(logits, labels)
for i in range(dml_model.model_num):
accs[i].update(precs[i].numpy(), batch_size)
costs[i].update(losses[i].numpy(), batch_size)
model_names = dml_model.full_name()
if step_id % args.log_freq == 0:
log_msg = "Valid Step{} ".format(step_id)
for model_id, (cost, acc) in enumerate(zip(costs, accs)):
log_msg += ", {} loss: {:.6f} acc: {:.6f}".format(
model_names[model_id], cost.avg[0], acc.avg[0])
logger.info(log_msg)
return costs, accs
def main(args):
if not args.use_gpu:
place = fluid.CPUPlace()
elif not args.use_parallel:
place = fluid.CUDAPlace(0)
else:
place = fluid.CUDAPlace(fluid.dygraph.parallel.Env().dev_id)
with fluid.dygraph.guard(place):
# 1. Define data reader
train_loader, valid_loader = create_reader(place, args)
# 2. Define neural network
models = [
MobileNetV1(class_dim=args.class_num),
MobileNetV1(class_dim=args.class_num)
]
optimizers = create_optimizer(models, args)
# 3. Use PaddleSlim DML strategy
dml_model = DML(models, args.use_parallel)
dml_optimizer = dml_model.opt(optimizers)
# 4. Train your network
save_parameters = (not args.use_parallel) or (
args.use_parallel and fluid.dygraph.parallel.Env().local_rank == 0)
best_valid_acc = [0] * dml_model.model_num
for epoch_id in range(args.epochs):
current_step_lr = dml_optimizer.get_lr()
lr_msg = "Epoch {}".format(epoch_id)
for model_id, lr in enumerate(current_step_lr):
lr_msg += ", {} lr: {:.6f}".format(
dml_model.full_name()[model_id], lr)
logger.info(lr_msg)
train_losses, train_accs = train(train_loader, dml_model,
dml_optimizer, args)
valid_losses, valid_accs = valid(valid_loader, dml_model, args)
for i in range(dml_model.model_num):
if valid_accs[i].avg[0] > best_valid_acc[i]:
best_valid_acc[i] = valid_accs[i].avg[0]
if save_parameters:
fluid.save_dygraph(
models[i].state_dict(),
os.path.join(args.model_save_dir,
dml_model.full_name()[i],
"best_model"))
summery_msg = "Epoch {} {}: valid_loss {:.6f}, valid_acc {:.6f}, best_valid_acc {:.6f}"
logger.info(
summery_msg.format(epoch_id,
dml_model.full_name()[i], valid_losses[
i].avg[0], valid_accs[i].avg[0],
best_valid_acc[i]))
if __name__ == '__main__':
args = parser.parse_args()
print_arguments(args)
main(args)
......@@ -13,3 +13,4 @@
# limitations under the License.
from .single_distiller import merge, fsp_loss, l2_loss, soft_label_loss, loss
from .dml import DML
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
import paddle.fluid as fluid
import paddle.nn.functional as F
class DML(fluid.dygraph.Layer):
def __init__(self, model, use_parallel):
super(DML, self).__init__()
self.model = model
self.use_parallel = use_parallel
self.model_num = len(self.model)
if self.use_parallel:
strategy = fluid.dygraph.parallel.prepare_context()
self.model = [
fluid.dygraph.parallel.DataParallel(m, strategy)
for m in self.model
]
def full_name(self):
return [m.full_name() for m in self.model]
def forward(self, input):
return [m(input) for m in self.model]
def opt(self, optimizer):
assert len(
optimizer
) == self.model_num, "The number of optimizers must match the number of models"
optimizer = DMLOptimizers(self.model, optimizer, self.use_parallel)
return optimizer
def ce_loss(self, logits, labels):
assert len(
logits
) == self.model_num, "The number of logits must match the number of models"
ce_losses = []
for i in range(self.model_num):
ce_losses.append(
fluid.layers.mean(
fluid.layers.softmax_with_cross_entropy(logits[i],
labels)))
return ce_losses
def kl_loss(self, logits):
assert len(
logits
) == self.model_num, "The number of logits must match the number of models"
if self.model_num == 1:
return []
kl_losses = []
for i in range(self.model_num):
cur_kl_loss = 0
for j in range(self.model_num):
if i != j:
x = F.log_softmax(logits[i], axis=1)
y = fluid.layers.softmax(logits[j], axis=1)
cur_kl_loss += fluid.layers.kldiv_loss(
x, y, reduction='batchmean')
kl_losses.append(cur_kl_loss / (self.model_num - 1))
return kl_losses
def loss(self, logits, labels):
gt_losses = self.ce_loss(logits, labels)
kl_losses = self.kl_loss(logits)
if self.model_num > 1:
return [a + b for a, b in zip(gt_losses, kl_losses)]
else:
return gt_losses
def acc(self, logits, labels, k):
accs = [
fluid.layers.accuracy(
input=l, label=labels, k=k) for l in logits
]
return accs
def train(self):
for m in self.model:
m.train()
def eval(self):
for m in self.model:
m.eval()
class DMLOptimizers(object):
def __init__(self, model, optimizer, use_parallel):
self.model = model
self.optimizer = optimizer
self.use_parallel = use_parallel
def minimize(self, losses):
assert len(losses) == len(
self.optimizer
), "The number of losses must match the number of optimizers"
for i in range(len(losses)):
if self.use_parallel:
losses[i] = self.model[i].scale_loss(losses[i])
losses[i].backward()
self.model[i].apply_collective_grads()
else:
losses[i].backward()
self.optimizer[i].minimize(losses[i])
self.model[i].clear_gradients()
def get_lr(self):
current_step_lr = [opt.current_step_lr() for opt in self.optimizer]
return current_step_lr
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License"
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from .mobilenet import MobileNetV1
from .resnet import ResNet
__all__ = ["MobileNetV1", "ResNet"]
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#order: standard library, third party, local library
import os
import time
import sys
import math
import numpy as np
import argparse
import paddle
import paddle.fluid as fluid
from paddle.fluid.initializer import MSRA
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
from paddle.fluid.dygraph.base import to_variable
from paddle.fluid import framework
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
filter_size,
num_filters,
stride,
padding,
channels=None,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "_weights"),
bias_attr=False)
self._batch_norm = BatchNorm(
num_filters,
act=act,
param_attr=ParamAttr(name=self.full_name() + "_bn" + "_scale"),
bias_attr=ParamAttr(name=self.full_name() + "_bn" + "_offset"),
moving_mean_name=self.full_name() + "_bn" + '_mean',
moving_variance_name=self.full_name() + "_bn" + '_variance')
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class DepthwiseSeparable(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters1,
num_filters2,
num_groups,
stride,
scale,
name=None):
super(DepthwiseSeparable, self).__init__()
self._depthwise_conv = ConvBNLayer(
num_channels=num_channels,
num_filters=int(num_filters1 * scale),
filter_size=3,
stride=stride,
padding=1,
num_groups=int(num_groups * scale),
use_cudnn=False)
self._pointwise_conv = ConvBNLayer(
num_channels=int(num_filters1 * scale),
filter_size=1,
num_filters=int(num_filters2 * scale),
stride=1,
padding=0)
def forward(self, inputs):
y = self._depthwise_conv(inputs)
y = self._pointwise_conv(y)
return y
class MobileNetV1(fluid.dygraph.Layer):
def __init__(self, scale=1.0, class_dim=100):
super(MobileNetV1, self).__init__()
self.scale = scale
self.dwsl = []
self.conv1 = ConvBNLayer(
num_channels=3,
filter_size=3,
channels=3,
num_filters=int(32 * scale),
stride=1,
padding=1)
dws21 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(32 * scale),
num_filters1=32,
num_filters2=64,
num_groups=32,
stride=1,
scale=scale),
name="conv2_1")
self.dwsl.append(dws21)
dws22 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(64 * scale),
num_filters1=64,
num_filters2=128,
num_groups=64,
stride=1,
scale=scale),
name="conv2_2")
self.dwsl.append(dws22)
dws31 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=128,
num_groups=128,
stride=1,
scale=scale),
name="conv3_1")
self.dwsl.append(dws31)
dws32 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(128 * scale),
num_filters1=128,
num_filters2=256,
num_groups=128,
stride=2,
scale=scale),
name="conv3_2")
self.dwsl.append(dws32)
dws41 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=256,
num_groups=256,
stride=1,
scale=scale),
name="conv4_1")
self.dwsl.append(dws41)
dws42 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(256 * scale),
num_filters1=256,
num_filters2=512,
num_groups=256,
stride=2,
scale=scale),
name="conv4_2")
self.dwsl.append(dws42)
for i in range(5):
tmp = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=512,
num_groups=512,
stride=1,
scale=scale),
name="conv5_" + str(i + 1))
self.dwsl.append(tmp)
dws56 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(512 * scale),
num_filters1=512,
num_filters2=1024,
num_groups=512,
stride=2,
scale=scale),
name="conv5_6")
self.dwsl.append(dws56)
dws6 = self.add_sublayer(
sublayer=DepthwiseSeparable(
num_channels=int(1024 * scale),
num_filters1=1024,
num_filters2=1024,
num_groups=1024,
stride=1,
scale=scale),
name="conv6")
self.dwsl.append(dws6)
self.pool2d_avg = Pool2D(pool_type='avg', global_pooling=True)
self.out = Linear(
int(1024 * scale),
class_dim,
param_attr=ParamAttr(
initializer=MSRA(), name=self.full_name() + "fc7_weights"),
bias_attr=ParamAttr(name=self.full_name() + "fc7_offset"))
def forward(self, inputs):
y = self.conv1(inputs)
for dws in self.dwsl:
y = dws(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, 1024])
y = self.out(y)
return y
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import paddle
import paddle.fluid as fluid
from paddle.fluid.layer_helper import LayerHelper
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, BatchNorm, Linear
class ConvBNLayer(fluid.dygraph.Layer):
def __init__(self,
num_channels,
num_filters,
filter_size,
stride=1,
groups=1,
act=None):
super(ConvBNLayer, self).__init__()
self._conv = Conv2D(
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=(filter_size - 1) // 2,
groups=groups,
act=None,
bias_attr=False)
self._batch_norm = BatchNorm(num_filters, act=act)
def forward(self, inputs):
y = self._conv(inputs)
y = self._batch_norm(y)
return y
class BottleneckBlock(fluid.dygraph.Layer):
def __init__(self, num_channels, num_filters, stride, shortcut=True):
super(BottleneckBlock, self).__init__()
self.conv0 = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters,
filter_size=1,
act='relu')
self.conv1 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters,
filter_size=3,
stride=stride,
act='relu')
self.conv2 = ConvBNLayer(
num_channels=num_filters,
num_filters=num_filters * 4,
filter_size=1,
act=None)
if not shortcut:
self.short = ConvBNLayer(
num_channels=num_channels,
num_filters=num_filters * 4,
filter_size=1,
stride=stride)
self.shortcut = shortcut
self._num_channels_out = num_filters * 4
def forward(self, inputs):
y = self.conv0(inputs)
conv1 = self.conv1(y)
conv2 = self.conv2(conv1)
if self.shortcut:
short = inputs
else:
short = self.short(inputs)
y = fluid.layers.elementwise_add(x=short, y=conv2)
layer_helper = LayerHelper(self.full_name(), act='relu')
return layer_helper.append_activation(y)
class ResNet(fluid.dygraph.Layer):
def __init__(self, layers=50, class_dim=100):
super(ResNet, self).__init__()
self.layers = layers
supported_layers = [34, 50, 101, 152]
assert layers in supported_layers, \
"supported layers are {} but input layer is {}".format(supported_layers, layers)
if layers == 50:
depth = [3, 4, 6, 3]
elif layers == 101:
depth = [3, 4, 23, 3]
elif layers == 152:
depth = [3, 8, 36, 3]
num_channels = [64, 256, 512, 1024]
num_filters = [64, 128, 256, 512]
self.conv = ConvBNLayer(
num_channels=3,
num_filters=64,
filter_size=7,
stride=1,
act='relu')
self.pool2d_max = Pool2D(
pool_size=3, pool_stride=2, pool_padding=1, pool_type='max')
self.bottleneck_block_list = []
for block in range(len(depth)):
shortcut = False
for i in range(depth[block]):
bottleneck_block = self.add_sublayer(
'bb_%d_%d' % (block, i),
BottleneckBlock(
num_channels=num_channels[block]
if i == 0 else num_filters[block] * 4,
num_filters=num_filters[block],
stride=2 if i == 0 and block != 0 else 1,
shortcut=shortcut))
self.bottleneck_block_list.append(bottleneck_block)
shortcut = True
self.pool2d_avg = Pool2D(
pool_size=7, pool_type='avg', global_pooling=True)
self.pool2d_avg_output = num_filters[len(num_filters) - 1] * 4 * 1 * 1
import math
stdv = 1.0 / math.sqrt(2048 * 1.0)
self.out = Linear(
self.pool2d_avg_output,
class_dim,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.Uniform(-stdv, stdv)))
def forward(self, inputs):
y = self.conv(inputs)
for bottleneck_block in self.bottleneck_block_list:
y = bottleneck_block(y)
y = self.pool2d_avg(y)
y = fluid.layers.reshape(y, shape=[-1, self.pool2d_avg_output])
y = self.out(y)
return y
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册