learning_rate.py

# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import math
from paddle.fluid.initializer import init_on_cpu


def cosine_decay(learning_rate, num_epoch, steps_one_epoch):
    """Applies cosine decay to the learning rate.
    lr = 0.5 * (math.cos(epoch * (math.pi / 120)) + 1)
    """
    global_step = _decay_step_counter()

    with init_on_cpu():
        decayed_lr = learning_rate * \
                 (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \
                 * math.pi / num_epoch) + 1)/2
    return decayed_lr


def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch,
                             warmup_epochs, total_epoch, num_gpu):
    global_step = _decay_step_counter()
    epoch_idx = fluid.layers.floor(global_step / steps_one_epoch)

    lr = fluid.layers.create_global_var(
        shape=[1],
        value=0.0,
        dtype='float32',
        persistable=True,
        name="learning_rate")

    warmup_epoch_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True)
    num_gpu_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True)
    batch_idx = global_step - steps_one_epoch * epoch_idx

    with fluid.layers.control_flow.Switch() as switch:
        with switch.case(epoch_idx < warmup_epoch_var):
            epoch_ = (batch_idx + 1) / steps_one_epoch
            factor = 1 / num_gpu_var * (
                epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1)
            decayed_lr = learning_rate * factor * num_gpu_var
            fluid.layers.assign(decayed_lr, lr)
        epoch_ = (batch_idx + 1) / steps_one_epoch
        m = epoch_ / total_epoch
        frac = (1 + ops.cos(math.pi * m)) / 2
        cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var
        with switch.default():
            fluid.layers.assign(cosine_lr, lr)

    return lr