model.py 20.0 KB
Newer Older
J
jerrywgz 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95
#  Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import numpy as np
import time
import functools
import paddle
import paddle.fluid as fluid
from operations import *


class Cell():
    def __init__(self, genotype, C_prev_prev, C_prev, C, reduction,
                 reduction_prev):
        print(C_prev_prev, C_prev, C)

        if reduction_prev:
            self.preprocess0 = functools.partial(FactorizedReduce, C_out=C)
        else:
            self.preprocess0 = functools.partial(
                ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0)
        self.preprocess1 = functools.partial(
            ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0)
        if reduction:
            op_names, indices = zip(*genotype.reduce)
            concat = genotype.reduce_concat
        else:
            op_names, indices = zip(*genotype.normal)
            concat = genotype.normal_concat
        print(op_names, indices, concat, reduction)
        self._compile(C, op_names, indices, concat, reduction)

    def _compile(self, C, op_names, indices, concat, reduction):
        assert len(op_names) == len(indices)
        self._steps = len(op_names) // 2
        self._concat = concat
        self.multiplier = len(concat)

        self._ops = []
        for name, index in zip(op_names, indices):
            stride = 2 if reduction and index < 2 else 1
            op = functools.partial(OPS[name], C=C, stride=stride, affine=True)
            self._ops += [op]
        self._indices = indices

    def forward(self, s0, s1, drop_prob, is_train, name):
        self.training = is_train
        preprocess0_name = name + 'preprocess0.'
        preprocess1_name = name + 'preprocess1.'
        s0 = self.preprocess0(s0, name=preprocess0_name)
        s1 = self.preprocess1(s1, name=preprocess1_name)
        out = [s0, s1]
        for i in range(self._steps):
            h1 = out[self._indices[2 * i]]
            h2 = out[self._indices[2 * i + 1]]
            op1 = self._ops[2 * i]
            op2 = self._ops[2 * i + 1]
            h3 = op1(h1, name=name + '_ops.' + str(2 * i) + '.')
            h4 = op2(h2, name=name + '_ops.' + str(2 * i + 1) + '.')
            if self.training and drop_prob > 0.:
                if h3 != h1:
                    h3 = fluid.layers.dropout(
                        h3,
                        drop_prob,
                        dropout_implementation='upscale_in_train')
                if h4 != h2:
                    h4 = fluid.layers.dropout(
                        h4,
                        drop_prob,
                        dropout_implementation='upscale_in_train')
            s = h3 + h4
            out += [s]
J
jerrywgz 已提交
96 97
        concat_ = fluid.layers.concat([out[i] for i in self._concat], axis=1, name=name+'concat')
        return concat_
J
jerrywgz 已提交
98 99 100


def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'):
J
jerrywgz 已提交
101
    relu_a = fluid.layers.relu(input)
J
jerrywgz 已提交
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3)
    conv2d_a = fluid.layers.conv2d(
        pool_a,
        128,
        1,
        name=aux_name + '.features.2',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.2.weight'),
        bias_attr=False)
    bn_a_name = aux_name + '.features.3'
    bn_a = fluid.layers.batch_norm(
        conv2d_a,
        act='relu',
        name=bn_a_name,
        param_attr=ParamAttr(
            initializer=Constant(1.), name=bn_a_name + '.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name=bn_a_name + '.bias'),
        moving_mean_name=bn_a_name + '.running_mean',
        moving_variance_name=bn_a_name + '.running_var')
    conv2d_b = fluid.layers.conv2d(
        bn_a,
        768,
        2,
        name=aux_name + '.features.5',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.5.weight'),
        bias_attr=False)
    bn_b_name = aux_name + '.features.6'
    bn_b = fluid.layers.batch_norm(
        conv2d_b,
        act='relu',
        name=bn_b_name,
        param_attr=ParamAttr(
            initializer=Constant(1.), name=bn_b_name + '.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name=bn_b_name + '.bias'),
        moving_mean_name=bn_b_name + '.running_mean',
        moving_variance_name=bn_b_name + '.running_var')
    fc_name = aux_name + '.classifier'
    fc = fluid.layers.fc(bn_b,
                         num_classes,
                         name=fc_name,
                         param_attr=ParamAttr(
                             initializer=Normal(scale=1e-3),
                             name=fc_name + '.weight'),
                         bias_attr=ParamAttr(
                             initializer=Constant(0.), name=fc_name + '.bias'))
    return fc


def StemConv(input, C_out, kernel_size, padding):
    conv_a = fluid.layers.conv2d(
        input,
        C_out,
        kernel_size,
        padding=padding,
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0), name='stem.0.weight'),
        bias_attr=False)
    bn_a = fluid.layers.batch_norm(
        conv_a,
        param_attr=ParamAttr(
            initializer=Constant(1.), name='stem.1.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name='stem.1.bias'),
        moving_mean_name='stem.1.running_mean',
        moving_variance_name='stem.1.running_var')
    return bn_a


class NetworkCIFAR(object):
    def __init__(self, C, class_num, layers, auxiliary, genotype):
        self._layers = layers
        self._auxiliary = auxiliary
R
root 已提交
182
        self.class_num = class_num
J
jerrywgz 已提交
183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204

        stem_multiplier = 3
        self.drop_path_prob = 0
        C_curr = stem_multiplier * C

        C_prev_prev, C_prev, C_curr = C_curr, C_curr, C
        self.cells = []
        reduction_prev = False
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                C_curr *= 2
                reduction = True
            else:
                reduction = False
            cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
                        reduction_prev)
            reduction_prev = reduction
            self.cells += [cell]
            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
            if i == 2 * layers // 3:
                C_to_auxiliary = C_prev

R
root 已提交
205
    def build_input(self, image_shape, is_train):
J
jerrywgz 已提交
206 207 208 209
        if is_train:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1],
R
root 已提交
210
                        [-1, 1], [50, -1, self.class_num - 1]],
J
jerrywgz 已提交
211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227
                lod_levels=[0, 0, 0, 0, 0, 0, 0],
                dtypes=[
                    "float32", "int64", "int64", "float32", "int32", "int32",
                    "float32"
                ],
                use_double_buffer=True,
                name='train_reader')
        else:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1]],
                lod_levels=[0, 0],
                dtypes=["float32", "int64"],
                use_double_buffer=True,
                name='test_reader')
        return py_reader

R
root 已提交
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
    def forward(self, init_channel, is_train):
        self.training = is_train
        self.logits_aux = None
        num_channel = init_channel * 3
        s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1)
        for i, cell in enumerate(self.cells):
            name = 'cells.' + str(i) + '.'
            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
                                      name)
            if i == int(2 * self._layers // 3):
                if self._auxiliary and self.training:
                    self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num)
        out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg")
        self.logits = fluid.layers.fc(out,
                                      size=self.class_num,
                                      param_attr=ParamAttr(
                                          initializer=Normal(scale=1e-3),
                                          name='classifier.weight'),
                                      bias_attr=ParamAttr(
247
                                          initializer=Constant(0),
R
root 已提交
248 249 250 251
                                          name='classifier.bias'))
        return self.logits, self.logits_aux

    def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda):
J
jerrywgz 已提交
252 253 254 255
        self.image, self.ya, self.yb, self.lam, self.label_reshape,\
           self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader)
        self.logits, self.logits_aux = self.forward(init_channels, True)
        self.mixup_loss = self.mixup_loss(aux, aux_w)
R
root 已提交
256
        return self.mixup_loss
J
jerrywgz 已提交
257 258 259 260 261 262 263 264

    def test_model(self, py_reader, init_channels):
        self.image, self.ya = fluid.layers.read_file(py_reader)
        self.logits, _ = self.forward(init_channels, False)
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss = fluid.layers.cross_entropy(prob, self.ya)
        acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1)
        acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5)
R
root 已提交
265
        return prob, acc_1, acc_5
J
jerrywgz 已提交
266 267 268 269 270

    def mixup_loss(self, auxiliary, auxiliary_weight):
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss_a = fluid.layers.cross_entropy(prob, self.ya)
        loss_b = fluid.layers.cross_entropy(prob, self.yb)
R
root 已提交
271

J
jerrywgz 已提交
272 273 274 275 276 277 278 279 280 281 282 283 284
        loss_a_mean = fluid.layers.reduce_mean(loss_a)
        loss_b_mean = fluid.layers.reduce_mean(loss_b)
        loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean
        if auxiliary:
            prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
            loss_a_aux = fluid.layers.cross_entropy(prob_aux, self.ya)
            loss_b_aux = fluid.layers.cross_entropy(prob_aux, self.yb)
            loss_a_aux_mean = fluid.layers.reduce_mean(loss_a_aux)
            loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux)
            loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam
                                                     ) * loss_b_aux_mean
        return loss + auxiliary_weight * loss_aux

R
root 已提交
285
    def lrc_loss(self):
J
jerrywgz 已提交
286 287 288 289 290 291 292 293 294 295 296 297
        y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1))
        label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1])
        non_label_reshape = fluid.layers.squeeze(
            self.non_label_reshape, axes=[1])
        label_reshape.stop_gradient = True
        non_label_reshape.stop_graident = True

        y_diff_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                   label_reshape)
        y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape,
                                                       non_label_reshape)
        y_diff_label = fluid.layers.reshape(
R
root 已提交
298
            y_diff_label_reshape, shape=(1, -1, 1))
J
jerrywgz 已提交
299
        y_diff_non_label = fluid.layers.reshape(
J
jerrywgz 已提交
300
            y_diff_non_label_reshape, shape=(1, -1, self.class_num - 1))
J
jerrywgz 已提交
301 302 303 304 305 306
        y_diff_ = y_diff_non_label - y_diff_label

        y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0])
        rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0])
        rad_y_diff_trans = rad_var_trans * y_diff_
        lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1])
R
root 已提交
307 308 309 310 311
        shape_nbc = fluid.layers.shape(rad_y_diff_trans)
        shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2])
        num = fluid.layers.reduce_prod(shape_nb)
        num.stop_gradient = True
        lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num
J
jerrywgz 已提交
312 313 314
        lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_)

        return lrc_loss_mean
R
root 已提交
315

J
jerrywgz 已提交
316

R
root 已提交
317
def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'):
J
jerrywgz 已提交
318 319
    relu_a = fluid.layers.relu(input)
    pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=3)
R
root 已提交
320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374
    conv2d_a = fluid.layers.conv2d(
        pool_a,
        128,
        1,
        name=aux_name + '.features.2',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.2.weight'),
        bias_attr=False)
    bn_a_name = aux_name + '.features.3'
    bn_a = fluid.layers.batch_norm(
        conv2d_a,
        act='relu',
        name=bn_a_name,
        param_attr=ParamAttr(
            initializer=Constant(1.), name=bn_a_name + '.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name=bn_a_name + '.bias'),
        moving_mean_name=bn_a_name + '.running_mean',
        moving_variance_name=bn_a_name + '.running_var')
    conv2d_b = fluid.layers.conv2d(
        bn_a,
        768,
        2,
        act='relu',
        name=aux_name + '.features.5',
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0),
            name=aux_name + '.features.5.weight'),
        bias_attr=False)
    fc_name = aux_name + '.classifier'
    fc = fluid.layers.fc(conv2d_b,
                         num_classes,
                         name=fc_name,
                         param_attr=ParamAttr(
                             initializer=Normal(scale=1e-3),
                             name=fc_name + '.weight'),
                         bias_attr=ParamAttr(
                             initializer=Constant(0.), name=fc_name + '.bias'))
    return fc


def Stem0Conv(input, C_out):
    conv_a = fluid.layers.conv2d(
        input,
        C_out // 2,
        3,
        stride=2,
        padding=1,
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0), name='stem0.0.weight'),
        bias_attr=False)
375
    relu_a = fluid.layers.batch_norm(
R
root 已提交
376 377 378 379 380 381
        conv_a,
        param_attr=ParamAttr(
            initializer=Constant(1.), name='stem0.1.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name='stem0.1.bias'),
        moving_mean_name='stem0.1.running_mean',
J
jerrywgz 已提交
382 383
        moving_variance_name='stem0.1.running_var',
        act='relu')
R
root 已提交
384
    conv_b = fluid.layers.conv2d(
J
jerrywgz 已提交
385
        relu_a,
R
root 已提交
386 387
        C_out,
        3,
J
jerrywgz 已提交
388
        stride=2,
R
root 已提交
389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404
        padding=1,
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0), name='stem0.3.weight'),
        bias_attr=False)
    bn_b = fluid.layers.batch_norm(
        conv_b,
        param_attr=ParamAttr(
            initializer=Constant(1.), name='stem0.4.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name='stem0.4.bias'),
        moving_mean_name='stem0.4.running_mean',
        moving_variance_name='stem0.4.running_var')

    return bn_b

J
jerrywgz 已提交
405

R
root 已提交
406
def Stem1Conv(input, C_out):
J
jerrywgz 已提交
407
    relu_a = fluid.layers.relu(input)
R
root 已提交
408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427
    conv_a = fluid.layers.conv2d(
        relu_a,
        C_out,
        3,
        stride=2,
        padding=1,
        param_attr=ParamAttr(
            initializer=Xavier(
                uniform=False, fan_in=0), name='stem1.1.weight'),
        bias_attr=False)
    bn_a = fluid.layers.batch_norm(
        conv_a,
        param_attr=ParamAttr(
            initializer=Constant(1.), name='stem1.2.weight'),
        bias_attr=ParamAttr(
            initializer=Constant(0.), name='stem1.2.bias'),
        moving_mean_name='stem1.2.running_mean',
        moving_variance_name='stem1.2.running_var')
    return bn_a

J
jerrywgz 已提交
428

R
root 已提交
429
class NetworkImageNet(object):
J
jerrywgz 已提交
430
    def __init__(self, C, class_num, layers, genotype):
R
root 已提交
431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460
        self.class_num = class_num
        self._layers = layers

        self.drop_path_prob = 0

        C_prev_prev, C_prev, C_curr = C, C, C
        self.cells = []
        reduction_prev = True
        for i in range(layers):
            if i in [layers // 3, 2 * layers // 3]:
                C_curr *= 2
                reduction = True
            else:
                reduction = False
            cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction,
                        reduction_prev)
            reduction_prev = reduction
            self.cells += [cell]
            C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr
            if i == 2 * layers // 3:
                C_to_auxiliary = C_prev
        self.stem0 = functools.partial(Stem0Conv, C_out=C)
        self.stem1 = functools.partial(Stem1Conv, C_out=C)

    def build_input(self, image_shape, is_train):
        if is_train:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1]],
                lod_levels=[0, 0],
J
jerrywgz 已提交
461
                dtypes=["float32", "int64"],
R
root 已提交
462 463 464 465 466 467 468 469 470 471 472 473
                use_double_buffer=True,
                name='train_reader')
        else:
            py_reader = fluid.layers.py_reader(
                capacity=64,
                shapes=[[-1] + image_shape, [-1, 1]],
                lod_levels=[0, 0],
                dtypes=["float32", "int64"],
                use_double_buffer=True,
                name='test_reader')
        return py_reader

J
jerrywgz 已提交
474
    def forward(self, is_train):
R
root 已提交
475 476 477 478 479 480 481 482 483
        self.training = is_train
        self.logits_aux = None
        s0 = self.stem0(self.image)
        s1 = self.stem1(s0)
        for i, cell in enumerate(self.cells):
            name = 'cells.' + str(i) + '.'
            s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train,
                                      name)
            if i == int(2 * self._layers // 3):
J
jerrywgz 已提交
484
                if self.training:
R
root 已提交
485
                    self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num)
J
jerrywgz 已提交
486
        out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7)
R
root 已提交
487 488 489 490 491 492
        self.logits = fluid.layers.fc(out,
                                      size=self.class_num,
                                      param_attr=ParamAttr(
                                          initializer=Normal(scale=1e-3),
                                          name='classifier.weight'),
                                      bias_attr=ParamAttr(
493
                                          initializer=Constant(0),
R
root 已提交
494 495 496
                                          name='classifier.bias'))
        return self.logits, self.logits_aux

J
jerrywgz 已提交
497
    def calc_loss(self, auxiliary_weight):
R
root 已提交
498 499 500 501 502 503 504 505 506
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss = fluid.layers.cross_entropy(prob, self.label)

        loss_mean = fluid.layers.reduce_mean(loss)
        prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False)
        loss_aux = fluid.layers.cross_entropy(prob_aux, self.label)
        loss_aux_mean = fluid.layers.reduce_mean(loss_aux)
        return loss_mean + auxiliary_weight * loss_aux_mean

J
jerrywgz 已提交
507
    def train_model(self, py_reader, aux_w):
R
root 已提交
508
        self.image, self.label = fluid.layers.read_file(py_reader)
J
jerrywgz 已提交
509 510
        self.logits, self.logits_aux = self.forward(True)
        self.loss = self.calc_loss(aux_w)
R
root 已提交
511 512
        return self.loss

J
jerrywgz 已提交
513
    def test_model(self, py_reader):
R
root 已提交
514
        self.image, self.label = fluid.layers.read_file(py_reader)
J
jerrywgz 已提交
515
        self.logits, _ = self.forward(False)
R
root 已提交
516 517 518 519 520
        prob = fluid.layers.softmax(self.logits, use_cudnn=False)
        loss = fluid.layers.cross_entropy(prob, self.label)
        acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1)
        acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5)
        return prob, acc_1, acc_5