learning_rate.py 2.9 KB
Newer Older
J
jerrywgz 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# Based on:
# --------------------------------------------------------
# DARTS
# Copyright (c) 2018, Hanxiao Liu.
# Licensed under the Apache License, Version 2.0;
# --------------------------------------------------------

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import paddle
import paddle.fluid as fluid
import paddle.fluid.layers.ops as ops
from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter
import math
from paddle.fluid.initializer import init_on_cpu


def cosine_decay(learning_rate, num_epoch, steps_one_epoch):
    """Applies cosine decay to the learning rate.
    lr = 0.5 * (math.cos(epoch * (math.pi / 120)) + 1)
    """
    global_step = _decay_step_counter()

    with init_on_cpu():
        decayed_lr = learning_rate * \
R
root 已提交
41
                 (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \
J
jerrywgz 已提交
42 43
                 * math.pi / num_epoch) + 1)/2
    return decayed_lr
R
root 已提交
44

J
jerrywgz 已提交
45 46 47

def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch,
                             warmup_epochs, total_epoch, num_gpu):
R
root 已提交
48 49
    global_step = _decay_step_counter()
    epoch_idx = fluid.layers.floor(global_step / steps_one_epoch)
J
jerrywgz 已提交
50

R
root 已提交
51 52 53 54 55 56 57 58 59 60 61
    lr = fluid.layers.create_global_var(
        shape=[1],
        value=0.0,
        dtype='float32',
        persistable=True,
        name="learning_rate")

    warmup_epoch_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True)
    num_gpu_var = fluid.layers.fill_constant(
        shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True)
J
jerrywgz 已提交
62
    batch_idx = global_step - steps_one_epoch * epoch_idx
R
root 已提交
63 64 65 66

    with fluid.layers.control_flow.Switch() as switch:
        with switch.case(epoch_idx < warmup_epoch_var):
            epoch_ = (batch_idx + 1) / steps_one_epoch
J
jerrywgz 已提交
67 68
            factor = 1 / num_gpu_var * (
                epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1)
R
root 已提交
69 70 71
            decayed_lr = learning_rate * factor * num_gpu_var
            fluid.layers.assign(decayed_lr, lr)
        epoch_ = (batch_idx + 1) / steps_one_epoch
J
jerrywgz 已提交
72
        m = epoch_ / total_epoch
R
root 已提交
73 74 75 76 77 78
        frac = (1 + ops.cos(math.pi * m)) / 2
        cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var
        with switch.default():
            fluid.layers.assign(cosine_lr, lr)

    return lr