mindflow.common.lr_scheduler 源代码

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
# pylint: disable=C1801
"""lr scheduler"""
import bisect
import math

import numpy as np

from ..utils.check_func import check_lr_param_type_value, check_param_type


[文档]def get_poly_lr(global_step, lr_init, lr_end, lr_max, warmup_steps, total_steps, poly_power): r""" Generate polynomial decay learning rate array. The learning rate decays in a polynomial manner as training goes along. it follows :math:`lr = step * (lr_max - lr_init)/warmup_steps` , then :math:`lr = lr_end + (lr_max - lr_end) * [(1- i + step)/(total_steps - warmup_steps)]**poly_power` Args: global_step (int): current step number, non-negtive int value. lr_init (float): init learning rate, positive float value. lr_end (float): end learning rate, non-negtive float value. lr_max (float): max learning rate, positive float value. warmup_steps (int): number of warmup epochs, non-negtive int value. total_steps (int): total epoch of training, positive int value. poly_power (float): poly learning rate power, positive float value. Returns: Numpy.array, learning rate array. Supported Platforms: ``Ascend`` ``GPU`` Examples: >>> from mindflow.common import get_poly_lr >>> learning_rate = get_poly_lr(100, 0.001, 0.1, 0.0001, 1000, 10000, 0.5) >>> print(learning_rate.shape) (9900,) """ check_lr_param_type_value(global_step, "global_step", int, thresh_hold=0, restrict=False, exclude=bool) check_lr_param_type_value(lr_init, "lr_init", float, thresh_hold=0.0, restrict=True) check_lr_param_type_value(lr_end, "lr_end", float, thresh_hold=0.0, restrict=False) check_lr_param_type_value(lr_max, "lr_max", float, thresh_hold=0.0, restrict=True) check_lr_param_type_value(warmup_steps, "warmup_steps", int, thresh_hold=0, restrict=False, exclude=bool) check_lr_param_type_value(total_steps, "total_steps", int, thresh_hold=0, restrict=True, exclude=bool) check_lr_param_type_value(poly_power, "poly_power", float, thresh_hold=0.0, restrict=True) lr_each_step = [] if warmup_steps != 0: inc_each_step = (float(lr_max) - float(lr_init)) / float(warmup_steps) else: inc_each_step = 0 for i in range(total_steps): if i < warmup_steps: lr = float(lr_init) + inc_each_step * float(i) else: base = (1.0 - (float(i) - float(warmup_steps)) / (float(total_steps) - float(warmup_steps))) lr = float(lr_max - lr_end) * (base ** poly_power) lr = lr + lr_end if lr < 0.0: lr = 0.0 lr_each_step.append(lr) learning_rate = np.array(lr_each_step).astype(np.float32) current_step = global_step learning_rate = learning_rate[current_step:] return learning_rate
[文档]def get_multi_step_lr(lr_init, milestones, gamma, steps_per_epoch, last_epoch): r""" Generate decay learning rate array of each parameter group by gamma once the number of epoch reaches one of the milestones. Calculate learning rate by the given `milestone` and `lr_init`. Let the value of `milestone` be :math:`(M_1, M_2, ..., M_t, ..., M_N)` and the value of `lr_init` be :math:`(x_1, x_2, ..., x_t, ..., x_N)`. N is the length of `milestone`. Let the output learning rate be `y`, then for the i-th step, the formula of computing decayed_learning_rate[i] is: .. math:: y[i] = x_t,\ for\ i \in [M_{t-1}, M_t) Args: lr_init (float): init learning rate, positive float value. milestones (Union[list[int], tuple[int]]): list of epoch indices, each element in the list must be greater than 0. gamma (float): multiplicative factor of learning rate decay. steps_per_epoch (int): number of steps to each epoch, positive int value. last_epoch (int): total epoch of training, positive int value. Returns: Numpy.array, learning rate array. Raises: TypeError: If `lr_init` or `gamma` is not a float. TypeError: If `steps_per_epoch` or `last_epoch` is not an int. TypeError: If `milestones` is neither a tuple nor a list. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindflow import get_multi_step_lr >>> lr_init = 0.001 >>> milestones = [2, 4] >>> gamma = 0.1 >>> steps_per_epoch = 3 >>> last_epoch = 5 >>> lr = get_multi_step_lr(lr_init, milestones, gamma, steps_per_epoch, last_epoch) >>> print(lr) [1.e-03 1.e-03 1.e-03 1.e-03 1.e-03 1.e-03 1.e-04 1.e-04 1.e-04 1.e-04 1.e-04 1.e-04 1.e-05 1.e-05 1.e-05] """ check_lr_param_type_value(lr_init, "lr_init", float, thresh_hold=0.0, restrict=True) check_lr_param_type_value(gamma, "gamma", float, thresh_hold=0.0, restrict=True) check_lr_param_type_value(steps_per_epoch, "steps_per_epoch", int, thresh_hold=0, restrict=True) check_lr_param_type_value(last_epoch, "last_epoch", int, thresh_hold=0, restrict=True) check_param_type(milestones, "milestones", [list, tuple]) ordered_milestones = sorted(milestones) idx = bisect.bisect_left(ordered_milestones, last_epoch) new_milestones = ordered_milestones[:idx] new_milestones.append(last_epoch) step_milestones = [it * steps_per_epoch for it in new_milestones] lr = [] last_item = 0 last_lr = lr_init / gamma for item in step_milestones: cur_lr = last_lr * gamma lr += [cur_lr] * (item - last_item) last_item = item last_lr = cur_lr return np.array(lr).astype(np.float32)
def _get_linear_warmup_lr(warmup_steps, lr_end, lr_init=0.0): """warmup lr""" lr_inc = (float(lr_end) - float(lr_init)) / float(warmup_steps) lr = [float(lr_init) + lr_inc * (i + 1) for i in range(warmup_steps)] return lr def _get_cosine_annealing_lr(lr_init, steps_per_epoch, last_epoch, eta_min=1e-6): """cosine annealing lr""" total_steps = last_epoch * steps_per_epoch delta = 0.5 * (lr_init - eta_min) lr = [] for i in range(total_steps): tmp_epoch = min(math.floor(i / steps_per_epoch), last_epoch) lr.append(eta_min + delta * (1 + math.cos(math.pi * tmp_epoch / last_epoch))) return lr
[文档]def get_warmup_cosine_annealing_lr(lr_init, steps_per_epoch, last_epoch, warmup_epochs=0, warmup_lr_init=0.0, eta_min=1e-6): r""" Calculates learning rate base on cosine decay function. If warmup epoch is specified, the warmup epoch will be warmed up by linear annealing. For the i-th step, the formula of computing cosine decayed_learning_rate[i] is: .. math:: decayed\_learning\_rate[i] = eta\_min + 0.5 * (lr\_init - eta\_min) * (1 + cos(\frac{current\_epoch}{last\_epoch}\pi)) Where :math:`current\_epoch = floor(\frac{i}{steps\_per\_epoch})`. If warmup epoch is specified, for the i-th step in waramup epoch, the formula of computing warmup_learning_rate[i] is: .. math:: warmup\_learning\_rate[i] = (lr\_init - warmup\_lr\_init) * i / warmup\_steps + warmup\_lr\_init Args: lr_init (float): init learning rate, positive float value. steps_per_epoch (int): number of steps to each epoch, positive int value. last_epoch (int): total epoch of training, positive int value. warmup_epochs (int): total epoch of warming up, default: ``0``. warmup_lr_init (float): warmup init learning rate, default: ``0.0``. eta_min (float): minimum learning rate, default: ``1e-6``. Returns: Numpy.array, learning rate array. Raises: TypeError: If `lr_init` or `warmup_lr_init` or `eta_min` is not a float. TypeError: If `steps_per_epoch` or `warmup_epochs` or `last_epoch` is not an int. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindflow import get_warmup_cosine_annealing_lr >>> lr_init = 0.001 >>> steps_per_epoch = 3 >>> last_epoch = 5 >>> warmup_epochs = 1 >>> lr = get_warmup_cosine_annealing_lr(lr_init, steps_per_epoch, last_epoch, warmup_epochs=warmup_epochs) >>> print(lr) [3.3333333e-04 6.6666666e-04 1.0000000e-03 9.0460398e-04 9.0460398e-04 9.0460398e-04 6.5485400e-04 6.5485400e-04 6.5485400e-04 3.4614600e-04 3.4614600e-04 3.4614600e-04 9.6396012e-05 9.6396012e-05 9.6396012e-05] """ check_lr_param_type_value(lr_init, "lr_init", float, thresh_hold=0.0, restrict=True) check_lr_param_type_value(warmup_lr_init, "warmup_lr_init", float, thresh_hold=0.0, restrict=False) check_lr_param_type_value(eta_min, "eta_min", float, thresh_hold=0.0, restrict=False) check_lr_param_type_value(warmup_epochs, "warmup_epochs", int, thresh_hold=0, restrict=False) check_lr_param_type_value(steps_per_epoch, "steps_per_epoch", int, thresh_hold=0, restrict=True) check_lr_param_type_value(last_epoch, "last_epoch", int, thresh_hold=0, restrict=True) warmup_steps = warmup_epochs * steps_per_epoch warmup_lr_list = [] if warmup_epochs != 0: warmup_lr_list += _get_linear_warmup_lr(warmup_steps, lr_init, warmup_lr_init) cosine_lr_list = _get_cosine_annealing_lr(lr_init, steps_per_epoch, last_epoch, eta_min=eta_min) lr_each_step = warmup_lr_list + cosine_lr_list[warmup_steps:] return np.array(lr_each_step).astype(np.float32)