Source code for mindspore.experimental.optim.lr_scheduler

# Copyright 2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""LRScheduler."""
from collections import Counter
from bisect import bisect_right
import math
from mindspore import ops, Tensor, Parameter
from mindspore.experimental.optim.optimizer import Optimizer
from mindspore.common.api import jit_class
import mindspore.common.dtype as mstype
from mindspore.ops import functional as F
from mindspore.ops import operations as P
from mindspore import _checkparam as Validator


__all__ = ['StepLR', 'LinearLR', 'LRScheduler', 'ExponentialLR', 'PolynomialLR', 'ChainedScheduler',
           'MultiplicativeLR', 'ConstantLR', 'MultiStepLR', 'LambdaLR', 'SequentialLR', 'ReduceLROnPlateau',
           'CyclicLR', 'CosineAnnealingWarmRestarts', 'CosineAnnealingLR']


[docs]@jit_class class LRScheduler: r""" Basic class of learning rate schedule. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): The optimizer instance. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: TypeError: If `optimizer` is not an Optimizer. KeyError: If `last_epoch` != -1 and ``'initial_lr'`` not in param groups. ValueError: if `last_epoch` is not int. ValueError: If `last_epoch` is not greater than -1. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> >>> class ConstantLR(optim.lr_scheduler.LRScheduler): ... def __init__(self, optimizer, factor=0.5, total_iters=3, last_epoch=-1): ... self.factor = factor ... self.total_iters = total_iters ... super(ConstantLR, self).__init__(optimizer, last_epoch) ... ... def get_lr(self): ... if self.last_epoch == 0: ... return [lr * self.factor for lr in self._last_lr] ... if self.last_epoch != self.total_iters: ... return [lr * 1. for lr in self._last_lr] ... return [lr / self.factor for lr in self._last_lr] >>> >>> net = nn.Dense(8, 2) >>> optimizer = optim.SGD(net.trainable_params(), 0.01) >>> scheduler = ConstantLR(optimizer) >>> for i in range(4): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.005)] [Tensor(shape=[], dtype=Float32, value= 0.005)] [Tensor(shape=[], dtype=Float32, value= 0.01)] [Tensor(shape=[], dtype=Float32, value= 0.01)] """ def __init__(self, optimizer, last_epoch=-1): if not isinstance(optimizer, Optimizer): raise TypeError('{} is not an Optimizer'.format( type(optimizer).__name__)) Validator.check_value_type("last_epoch", last_epoch, [int]) if last_epoch < -1: raise ValueError("Invalid last_epoch: {}".format(last_epoch)) if last_epoch == -1: for group in optimizer.param_groups: group.setdefault('initial_lr', group['lr'].value()) else: for i, group in enumerate(optimizer.param_groups): if 'initial_lr' not in group: raise KeyError(f"param 'initial_lr' is not specified " f"in param_groups[{i}] when resuming an optimizer") self.base_lrs = [group['initial_lr'] for group in optimizer.param_groups] self.optimizer = optimizer self._last_lr = [group['lr'] for group in optimizer.param_groups] self.groups_num = len(optimizer.param_groups) self.last_epoch = Parameter(Tensor(last_epoch, dtype=mstype.float32), name='last_epoch_' + self.__class__.__name__) self.increase_tensor = Tensor(1, mstype.int32) self.step() @staticmethod def get_lr(): raise NotImplementedError
[docs] def get_last_lr(self): """ Return last computed learning rate by current scheduler. """ return [lr.value() for lr in self._last_lr]
[docs] def step(self, epoch=None): """ Get the current learning rate and change the learning rate. Args: epoch (int, optional): The index of the last epoch. Default: ``None``. """ if epoch is None: ops.assign_add(self.last_epoch, self.increase_tensor) values = self.get_lr() else: ops.assign(self.last_epoch, epoch) if hasattr(self, "_get_closed_form_lr"): values = self._get_closed_form_lr() else: values = self.get_lr() for i in range(self.groups_num): lr = values[i] ops.assign(self.optimizer.param_groups[i]["lr"], lr) return True
[docs]@jit_class class StepLR(LRScheduler): """Decays the learning rate of each parameter group by gamma every step_size epochs. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. step_size (int): Period of learning rate decay. gamma (float, optional): Multiplicative factor of learning rate decay. Default: ``0.5``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> import mindspore >>> from mindspore import nn >>> from mindspore.experimental import optim >>> # Define the network structure of LeNet5. Refer to >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py >>> net = LeNet5() >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) >>> optimizer = optim.Adam(net.trainable_params(), lr=0.05) >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch < 2 >>> # lr = 0.005 if 2 <= epoch < 4 >>> # lr = 0.0005 if 4 <= epoch < 6 >>> scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=2, gamma=0.1) >>> def forward_fn(data, label): ... logits = net(data) ... loss = loss_fn(logits, label) ... return loss, logits >>> grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) >>> def train_step(data, label): ... (loss, _), grads = grad_fn(data, label) ... optimizer(grads) ... return loss >>> for epoch in range(6): ... # Create the dataset taking MNIST as an example. Refer to ... # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/mnist.py ... for data, label in create_dataset(): ... train_step(data, label) ... scheduler.step() ... current_lr = scheduler.get_last_lr() """ def __init__(self, optimizer, step_size, gamma=0.1, last_epoch=-1): self.step_size = step_size self.gamma = gamma super(StepLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0 or self.last_epoch % self.step_size != 0: return lrs return [lr * self.gamma for lr in lrs] def _get_closed_form_lr(self): return [base_lr * self.gamma ** (self.last_epoch // self.step_size) for base_lr in self.base_lrs]
[docs]@jit_class class LinearLR(LRScheduler): """Decays the learning rate of each parameter group by linearly changing small multiplicative factor until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. start_factor (float, optional): The number we multiply learning rate in the first epoch. The multiplication factor changes towards `end_factor` in the following epochs. Default: ``1.0 /3``. end_factor (float, optional): The number we multiply learning rate at the end of linear changing process. Default: ``1.0``. total_iters (int, optional): The number of iterations that multiplicative factor reaches to 1. Default: ``5``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: ValueError: If `start_factor` is not in the range of (0, 1]. ValueError: If `end_factor` is not in the range of [0, 1]. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> import mindspore >>> from mindspore import nn >>> from mindspore.experimental import optim >>> # Define the network structure of LeNet5. Refer to >>> # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/lenet.py >>> net = LeNet5() >>> loss_fn = nn.SoftmaxCrossEntropyWithLogits(sparse=True) >>> optimizer = optim.Adam(net.trainable_params(), lr=0.05) >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.025 if epoch == 0 >>> # lr = 0.03125 if epoch == 1 >>> # lr = 0.0375 if epoch == 2 >>> # lr = 0.04375 if epoch == 3 >>> # lr = 0.05 if epoch >= 4 >>> scheduler = optim.lr_scheduler.LinearLR(optimizer, start_factor=0.5, total_iters=4) >>> def forward_fn(data, label): ... logits = net(data) ... loss = loss_fn(logits, label) ... return loss, logits >>> grad_fn = mindspore.value_and_grad(forward_fn, None, optimizer.parameters, has_aux=True) >>> def train_step(data, label): ... (loss, _), grads = grad_fn(data, label) ... optimizer(grads) ... return loss >>> for epoch in range(5): ... # Create the dataset taking MNIST as an example. Refer to ... # https://gitee.com/mindspore/docs/blob/r2.2/docs/mindspore/code/mnist.py ... for data, label in create_dataset(): ... train_step(data, label) ... scheduler.step() ... current_lr = scheduler.get_last_lr() """ def __init__(self, optimizer, start_factor=1.0 / 3, end_factor=1.0, total_iters=5, last_epoch=-1): if start_factor > 1.0 or start_factor <= 0: raise ValueError('Starting multiplicative factor expected to be greater than 0 and ' 'less than or equal to 1.') if end_factor > 1.0 or end_factor < 0: raise ValueError('Ending multiplicative factor expected to be between 0 and 1.') self.start_factor = start_factor self.end_factor = end_factor self.total_iters = total_iters super(LinearLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0: return [lr * self.start_factor for lr in lrs] if self.last_epoch > self.total_iters: return lrs factor = 1. + (self.end_factor - self.start_factor) / ( self.total_iters * self.start_factor + (self.last_epoch - 1) * (self.end_factor - self.start_factor)) return [lr * factor for lr in lrs] def _get_closed_form_lr(self): return [base_lr * (self.start_factor + (self.end_factor - self.start_factor) * min(self.total_iters, self.last_epoch) / self.total_iters) for base_lr in self.base_lrs]
[docs]@jit_class class ExponentialLR(LRScheduler): r""" For each epoch, the learning rate decays exponentially, multiplied by gamma. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. gamma (float): Learning rate scaling factor. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() ... self.fc = nn.Dense(16 * 5 * 5, 120) ... def construct(self, x): ... return self.fc(x) >>> net = Net() >>> optimizer = optim.Adam(net.trainable_params(), 0.01) >>> scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5) >>> for i in range(3): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.005)] [Tensor(shape=[], dtype=Float32, value= 0.0025)] [Tensor(shape=[], dtype=Float32, value= 0.00125)] """ def __init__(self, optimizer, gamma, last_epoch=-1): self.gamma = gamma super(ExponentialLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0: return lrs return [lr * self.gamma for lr in lrs] def _get_closed_form_lr(self): return [base_lr * self.gamma ** self.last_epoch for base_lr in self.base_lrs]
[docs]@jit_class class PolynomialLR(LRScheduler): r""" For each epoch, the learning rate is adjusted by polynomial fitting. When the epoch is greater than or equal to `total_iters` , the learning rate is ``0`` . Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. The polynomial formula for learning rate calculation is as follows: .. math:: \begin{split} &factor = (\frac{1.0 - \frac{last\_epoch}{total\_iters}}{1.0 - \frac{last\_epoch - 1.0}{total\_iters}}) ^{power}\\ &lr = lr \times factor \end{split} .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. total_iters (int, optional): The number of iterations adjusting learning rate by polynomial fitting. Default: ``5``. power (float, optional): Power of polynomial. Default: ``1.0``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() ... self.fc = nn.Dense(16 * 5 * 5, 120) ... def construct(self, x): ... return self.fc(x) >>> net = Net() >>> optimizer = optim.Adam(net.trainable_params(), 0.01) >>> scheduler = optim.lr_scheduler.PolynomialLR(optimizer) >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.008)] [Tensor(shape=[], dtype=Float32, value= 0.006)] [Tensor(shape=[], dtype=Float32, value= 0.004)] [Tensor(shape=[], dtype=Float32, value= 0.002)] [Tensor(shape=[], dtype=Float32, value= 0)] [Tensor(shape=[], dtype=Float32, value= 0)] """ def __init__(self, optimizer, total_iters=5, power=1.0, last_epoch=-1): self.total_iters = total_iters self.power = power self.min = P.Minimum() self.cast = P.Cast() super(PolynomialLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0 or self.last_epoch > self.total_iters: return lrs factor = ((1.0 - self.last_epoch / self.total_iters) / ( 1.0 - (self.last_epoch - 1) / self.total_iters)) ** self.power return [lr * factor for lr in lrs] def _get_closed_form_lr(self): return [ (base_lr * (1.0 - self.min(self.total_iters, self.last_epoch) / self.total_iters) ** self.power) for base_lr in self.base_lrs]
[docs]@jit_class class ChainedScheduler: r""" Save the learning rate scheduler chain list of multiple learning rate schedulers, and call the step() function to execute the step() function of each learning rate scheduler. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: schedulers (list[:class:`mindspore.experimental.optim.lr_scheduler.LRScheduler`]): List of learning rate schedulers. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> class Net(nn.Cell): ... def __init__(self): ... super(Net, self).__init__() ... self.fc = nn.Dense(16 * 5 * 5, 120) ... def construct(self, x): ... return self.fc(x) >>> net = Net() >>> optimizer = optim.Adam(net.trainable_params(), 0.01) >>> scheduler1 = optim.lr_scheduler.PolynomialLR(optimizer) >>> scheduler2 = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.5) >>> scheduler = optim.lr_scheduler.ChainedScheduler([scheduler1, scheduler2]) >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.004)] [Tensor(shape=[], dtype=Float32, value= 0.0015)] [Tensor(shape=[], dtype=Float32, value= 0.0005)] [Tensor(shape=[], dtype=Float32, value= 0.000125)] [Tensor(shape=[], dtype=Float32, value= 0)] [Tensor(shape=[], dtype=Float32, value= 0)] """ def __init__(self, schedulers): self._schedulers = list(schedulers) self.optimizer = schedulers[0].optimizer self._last_lr = [lr for lr in self._schedulers[-1]._last_lr] # pylint: disable=W0212
[docs] def step(self): """ Sequential execution of the saved learning rate scheduler's step() function. """ for scheduler in self._schedulers: scheduler.step()
[docs] def get_last_lr(self): """ Return last computed learning rate by current scheduler. """ return [lr.value() for lr in self._last_lr]
[docs]@jit_class class LambdaLR(LRScheduler): """Sets the learning rate of each parameter group to the initial lr times a given function. When last_epoch=-1, sets initial lr as lr. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. lr_lambda (Union(function, list)): A function which computes a multiplicative factor given a parameter `last_epoch`, or a list of such functions, one for each group in `optimizer.param_groups`. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: ValueError: If the length of `lr_lambda` is not equal to the number of param groups. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> net = nn.Dense(2, 3) >>> optimizer = optim.Adam(net.trainable_params(), 0.01) >>> lmbda = lambda epoch: 0.9 ** epoch >>> scheduler = optim.lr_scheduler.LambdaLR(optimizer, lr_lambda=[lmbda]) >>> for i in range(3): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.009)] [Tensor(shape=[], dtype=Float32, value= 0.0081)] [Tensor(shape=[], dtype=Float32, value= 0.00729)] """ def __init__(self, optimizer, lr_lambda, last_epoch=-1): if not isinstance(lr_lambda, list) and not isinstance(lr_lambda, tuple): self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups) else: if len(lr_lambda) != len(optimizer.param_groups): raise ValueError("Expected {} lr_lambdas, but got {}".format( len(optimizer.param_groups), len(lr_lambda))) self.lr_lambdas = list(lr_lambda) super(LambdaLR, self).__init__(optimizer, last_epoch) def get_lr(self): return [base_lr * lmbda(self.last_epoch) for lmbda, base_lr in zip(self.lr_lambdas, self.base_lrs)]
[docs]@jit_class class MultiplicativeLR(LRScheduler): """Multiply the learning rate of each parameter group by the factor given in the specified function. When last_epoch=-1, sets initial lr as lr. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. lr_lambda (Union(function, list)): A function which computes a multiplicative factor given an integer parameter epoch, or a list of such functions, one for each group in optimizer.param_groups. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> net = nn.Dense(2, 3) >>> optimizer = optim.Adam(net.trainable_params(), 0.01) >>> lmbda = lambda epoch: 0.95 >>> scheduler = optim.lr_scheduler.MultiplicativeLR(optimizer, lr_lambda=lmbda) >>> for i in range(3): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.0095)] [Tensor(shape=[], dtype=Float32, value= 0.009025)] [Tensor(shape=[], dtype=Float32, value= 0.00857375)] """ def __init__(self, optimizer, lr_lambda, last_epoch=-1): if not isinstance(lr_lambda, list) and not isinstance(lr_lambda, tuple): self.lr_lambdas = [lr_lambda] * len(optimizer.param_groups) else: if len(lr_lambda) != len(optimizer.param_groups): raise ValueError("Expected {} lr_lambdas, but got {}".format( len(optimizer.param_groups), len(lr_lambda))) self.lr_lambdas = list(lr_lambda) super(MultiplicativeLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch > 0: return [lr * lmbda(self.last_epoch) for lmbda, lr in zip(self.lr_lambdas, lrs)] return lrs
[docs]@jit_class class MultiStepLR(LRScheduler): """Multiply the learning rate of each parameter group by gamma once the number of epoch reaches one of the milestones. Notice that such change can happen simultaneously with other changes to the learning rate from outside this scheduler. When last_epoch=-1, sets initial lr as lr. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. milestones (list): List of epoch indices. When `last_epoch` reach the milestone, multiply the learning rate of each parameter group by `gamma`. gamma (float, optional): Multiplicative factor of learning rate decay. Default: ``0.1``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: TypeError: If the `milestones` is not list. TypeError: If elements of the `milestones` are not int. TypeError: If the `gamma` is not float. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> net = nn.Dense(2, 3) >>> optimizer = optim.Adam(net.trainable_params(), 0.05) >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.05 if epoch < 2 >>> # lr = 0.005 if 2 <= epoch < 4 >>> # lr = 0.0005 if epoch >= 4 >>> scheduler = optim.lr_scheduler.MultiStepLR(optimizer, milestones=[2,4], gamma=0.1) >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0.005)] [Tensor(shape=[], dtype=Float32, value= 0.005)] [Tensor(shape=[], dtype=Float32, value= 0.0005)] [Tensor(shape=[], dtype=Float32, value= 0.0005)] [Tensor(shape=[], dtype=Float32, value= 0.0005)] """ def __init__(self, optimizer, milestones, gamma=0.1, last_epoch=-1): Validator.check_value_type('milestones', milestones, [list]) for milestone in milestones: if not isinstance(milestone, int): raise TypeError(f"For 'MultiStepLR', elements of the 'milestones' must be type of int, " f"but got one element of 'milestones' type: {type(milestone)}.") Validator.check_value_type('gamma', gamma, [float, int]) self.milestones = Counter(milestones) self.milestones_keys = list(self.milestones.keys()) self.milestones_values = list(self.milestones.values()) self.gamma = gamma super(MultiStepLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] tmp_epoch = int(self.last_epoch.value()) for i in range(len(self.milestones_keys)): if tmp_epoch == self.milestones_keys[i]: value = self.milestones_values[i] return [lr * self.gamma ** value for lr in lrs] return lrs def _get_closed_form_lr(self): return [base_lr * self.gamma ** (self.last_epoch // self.step_size) for base_lr in self.base_lrs]
[docs]@jit_class class ConstantLR(LRScheduler): """Decays the learning rate of each parameter group by a small constant factor until the number of epoch reaches a pre-defined milestone: total_iters. Notice that such decay can happen simultaneously with other changes to the learning rate from outside this scheduler. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. factor (float, optional): The factor number multiplied learning rate. Default: ``1./3``. total_iters (int, optional): The number of steps that the scheduler decays the learning rate, when the `last_epoch` reach `total_iters`, restore the learning rate. Default: ``5``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore import nn >>> from mindspore.experimental import optim >>> net = nn.Dense(2, 3) >>> optimizer = optim.Adam(net.trainable_params(), 0.05) >>> # Assuming optimizer uses lr = 0.05 for all groups >>> # lr = 0.025 if epoch <4 >>> # lr = 0.05 if epoch >= 4 >>> scheduler = optim.lr_scheduler.ConstantLR(optimizer, factor=0.5, total_iters=4) >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.025)] [Tensor(shape=[], dtype=Float32, value= 0.025)] [Tensor(shape=[], dtype=Float32, value= 0.025)] [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0.05)] """ def __init__(self, optimizer, factor=1.0 / 3, total_iters=5, last_epoch=-1): if factor > 1.0 or factor < 0: raise ValueError('Constant multiplicative factor expected to be between 0 and 1.') self.factor = factor self.total_iters = total_iters super(ConstantLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0: return [lr * self.factor for lr in lrs] if self.last_epoch != self.total_iters: return lrs return [lr / self.factor for lr in lrs] def _get_closed_form_lr(self): return [base_lr * (self.factor + (self.last_epoch >= self.total_iters) * (1 - self.factor)) for base_lr in self.base_lrs]
[docs]@jit_class class SequentialLR: r""" Receives the list of schedulers that is expected to be called sequentially during optimization process and milestone points that provides exact intervals to reflect which scheduler is supposed to be called at a given epoch. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. schedulers (list[:class:`mindspore.experimental.optim.lr_scheduler.LRScheduler`]): List of learning rate schedulers. milestones (list): List of integers that reflects milestone points. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: ValueError: The optimizer in `schedulers` is different from the `optimizer` passed in. ValueError: The optimizer in `schedulers` is different from the optimizer of `schedulers[0]`. ValueError: Length of `milestones` is not equal to length of `schedulers` minus 1. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore.experimental import optim >>> from mindspore import nn >>> net = nn.Dense(3, 2) >>> optimizer = optim.Adam(net.trainable_params(), 0.1) >>> scheduler1 = optim.lr_scheduler.ConstantLR(optimizer, factor=0.1, total_iters=2) >>> scheduler2 = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9) >>> scheduler = optim.lr_scheduler.SequentialLR(optimizer, schedulers=[scheduler1, scheduler2], milestones=[2]) >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.01)] [Tensor(shape=[], dtype=Float32, value= 0.1)] [Tensor(shape=[], dtype=Float32, value= 0.09)] [Tensor(shape=[], dtype=Float32, value= 0.081)] [Tensor(shape=[], dtype=Float32, value= 0.0729)] [Tensor(shape=[], dtype=Float32, value= 0.06561)] """ def __init__(self, optimizer, schedulers, milestones, last_epoch=-1): for sched_idx in range(len(schedulers)): if schedulers[sched_idx].optimizer != optimizer: raise ValueError( "Sequential Schedulers expects all schedulers to belong to the same optimizer, but " f"got scheduler at index {sched_idx} is different from the optimizer passed in.") if schedulers[sched_idx].optimizer != schedulers[0].optimizer: raise ValueError( "Sequential Schedulers expects all schedulers to belong to the same optimizer, but " f"got schedulers at index {0} and {sched_idx} are different.") if len(milestones) != len(schedulers) - 1: raise ValueError( "Sequential Schedulers expects number of schedulers provided to be one more " "than the number of milestone points, but got number of schedulers {} and the " "number of milestones {}".format(len(schedulers), len(milestones))) self._schedulers = schedulers self.milestones = milestones self.milestones_len = len(milestones) self.last_epoch = Parameter(Tensor(last_epoch+1, dtype=mstype.float32), name='last_epoch_' + self.__class__.__name__) self.increase_tensor = Tensor(1, mstype.int32) self.optimizer = optimizer for group in self.optimizer.param_groups: ops.assign(group["lr"], group["initial_lr"]) for scheduler in self._schedulers: ops.assign_sub(scheduler.last_epoch, self.increase_tensor) self._schedulers[0].step() self._last_lr = schedulers[0]._last_lr # pylint: disable=W0212
[docs] def step(self): """ Get the current learning rate and change the learning rate. """ ops.assign_add(self.last_epoch, self.increase_tensor) tmp_epoch = int(self.last_epoch) cur_idx = bisect_right(self.milestones, tmp_epoch) scheduler = self._schedulers[cur_idx] if cur_idx > 0 and self.milestones[cur_idx - 1] == tmp_epoch: scheduler.step(0) else: scheduler.step()
[docs] def get_last_lr(self): """ Return last computed learning rate by current scheduler. """ return [lr.value() for lr in self._last_lr]
[docs]@jit_class class ReduceLROnPlateau: """ Reduce learning rate when a metric has stopped improving. Models often benefit from reducing the learning rate by a factor of 2-10 once learning stagnates. This scheduler reads a metrics quantity and if no improvement is seen for a 'patience' number of epochs, the learning rate is reduced. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. mode (str, optional): One of `min`, `max`. In `min` mode, lr will be reduced when the quantity monitored has stopped decreasing; in `max` mode it will be reduced when the quantity monitored has stopped increasing. Default: ``'min'``. factor (float, optional): Factor by which the learning rate will be reduced. Default: ``0.1``. patience (int, optional): Number of epochs with no improvement after which learning rate will be reduced. For example, if `patience = 2`, then we will ignore the first 2 epochs with no improvement, and will only decrease the LR after the 3rd epoch if the loss still hasn't improved then. Default: ``10``. threshold (float, optional): Threshold for measuring the new optimum, to only focus on significant changes. Default: ``1e-4``. threshold_mode (str, optional): One of `rel`, `abs`. Given dynamic_threshold is the benchmark to define whether the current metric is improvement, in ``'rel'`` mode, dynamic_threshold = best * ( 1 + threshold ) in ``'max'`` mode or best * ( 1 - threshold ) in ``'min'`` mode. In ``'abs'`` mode, dynamic_threshold = best + threshold in ``'max'`` mode or best - threshold in ``'min'`` mode. Default: ``'rel'``. cooldown (int, optional): Number of epochs to wait before resuming normal operation after lr has been reduced. Default: ``0``. min_lr (Union(float, list), optional): A scalar or a list of scalars. A lower bound on the learning rate of all param groups or each group respectively. Default: ``0``. eps (float, optional): Minimal decay applied to lr. If the difference between new and old lr is smaller than eps, the update is ignored. Default: ``1e-8``. Raises: ValueError: `factor` is greater than or equal to 1.0. TypeError: `optimizer` is not an `Optimizer`. ValueError: When `min_lr` is a list or tuple, the length of it is not equal to the number of param groups. ValueError: `mode` is neither ``'min'`` nor ``'max'``. ValueError: `threshold_mode` is neither ``'rel'`` nor ``'abs'``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore.experimental import optim >>> from mindspore import nn >>> net = nn.Dense(3, 2) >>> optimizer = optim.Adam(net.trainable_params(), 0.1) >>> scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, 'min', patience=0) >>> metrics = [1, 1.5, 1.8, 0.4, 0.5] >>> for i in range(5): ... scheduler.step(metrics[i]) ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.1)] [Tensor(shape=[], dtype=Float32, value= 0.01)] [Tensor(shape=[], dtype=Float32, value= 0.001)] [Tensor(shape=[], dtype=Float32, value= 0.001)] [Tensor(shape=[], dtype=Float32, value= 0.0001)] """ def __init__(self, optimizer, mode='min', factor=0.1, patience=10, threshold=1e-4, threshold_mode='rel', cooldown=0, min_lr=0, eps=1e-8): if factor >= 1.0: raise ValueError("The lr factor should be less than 1.0.") self.factor = factor if not isinstance(optimizer, Optimizer): raise TypeError("Expected an `Optimizer`, but got type {}".format(type(optimizer).__name__)) self.optimizer = optimizer if isinstance(min_lr, (list, tuple)): if len(min_lr) != len(optimizer.param_groups): raise ValueError("Expected {} min_lrs, got {}".format(len(optimizer.param_groups), len(min_lr))) self.min_lrs = [Tensor(lr, mstype.float32) for lr in min_lr] else: self.min_lrs = [Tensor(min_lr, mstype.float32)] * len(optimizer.param_groups) self.mode = mode self.patience = patience self.threshold = threshold self.threshold_mode = threshold_mode self.cooldown = cooldown self.cooldown_counter = 0 self.eps = eps self.mode_worse = None self.assign = P.Assign() self.cast = P.Cast() self.last_epoch = Parameter(Tensor(0, dtype=mstype.int32), name='last_epoch_' + self.__class__.__name__) if self.mode not in {'min', 'max'}: raise ValueError(f"`mode` should be 'min' or 'max', but got {self.mode}.") if self.threshold_mode not in {'rel', 'abs'}: raise ValueError(f"`threshold mode` should be 'rel' or 'abs', but got {self.threshold_mode}.") if self.mode == 'min': self.mode_worse = float("inf") else: self.mode_worse = float("-inf") self.best = Parameter(Tensor(self.mode_worse, dtype=mstype.float32), name='best') self.cooldown_counter = Parameter(Tensor(0, dtype=mstype.float32), name='cooldown_counter') self.wait = Parameter(Tensor(0, dtype=mstype.float32), name='wait') self.increase_tensor = Tensor(1, mstype.int32) self._last_lr = [group['lr'] for group in self.optimizer.param_groups]
[docs] def step(self, metrics): """ Get the current learning rate and change the learning rate. Args: metrics(Union(int, float)): the evaluation metrics. """ epoch = self.last_epoch + 1 current = self.cast(metrics, mstype.float32) self.assign(self.last_epoch, epoch) if self._is_improvement(current, self.best): ops.assign(self.best, current) ops.assign(self.wait, 0) else: ops.assign_add(self.wait, self.increase_tensor) if self.in_cooldown: ops.assign_sub(self.cooldown_counter, self.increase_tensor) ops.assign(self.wait, 0) if self.wait > self.patience: self._reduce_lr(epoch) ops.assign(self.cooldown_counter, self.cooldown) ops.assign(self.wait, 0) return True
def _reduce_lr(self, epoch): for i, lr in enumerate(self._last_lr): old_lr = lr.value() new_lr = ops.maximum(old_lr * self.factor, self.min_lrs[i]) if old_lr > new_lr + self.eps: ops.assign(lr, new_lr) return True @property def in_cooldown(self): """ Whether in cooldown period. """ return self.cooldown_counter > 0 def _is_improvement(self, current, best): """ Whether current metric value is better than best. """ if self.mode == 'min' and self.threshold_mode == 'rel': rel_epsilon = 1. - self.threshold benchmark = best * rel_epsilon return current < benchmark if self.mode == 'min' and self.threshold_mode == 'abs': benchmark = best - self.threshold return current < benchmark if self.mode == 'max' and self.threshold_mode == 'rel': rel_epsilon = self.threshold + 1. benchmark = best * rel_epsilon return current > benchmark benchmark = best + self.threshold return current > benchmark
[docs] def get_last_lr(self): """ Return last computed learning rate by current scheduler. """ return [lr.value() for lr in self._last_lr]
[docs]@jit_class class CyclicLR(LRScheduler): r""" Sets the learning rate of each parameter group according to cyclical learning rate policy (CLR). The policy cycles the learning rate between two boundaries with a constant frequency, as detailed in the paper `Cyclical Learning Rates for Training Neural Networks <https://arxiv.org/abs/1506.01186>`_. The distance between the two boundaries can be scaled on a per-iteration or per-cycle basis. This class has three built-in policies, as put forth in the paper: - "triangular": A basic triangular cycle without amplitude scaling. - "triangular2": A basic triangular cycle that scales initial amplitude by half each cycle. - "exp_range": A cycle that scales initial amplitude by :math:`\text{gamma}^{\text{cycle iterations}}` at each cycle iteration. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. base_lr (Union(float, list)): Initial learning rate which is the lower boundary in the cycle for each parameter group. max_lr (Union(float, list)): Upper learning rate boundaries in the cycle for each parameter group. Functionally, it defines the cycle amplitude (max_lr - base_lr). The lr at any cycle is the sum of base_lr and some scaling of the amplitude. step_size_up (int, optional): Number of training iterations in the increasing half of a cycle. Default: ``2000``. step_size_down (int, optional): Number of training iterations in the decreasing half of a cycle. If step_size_down is None, it is set to step_size_up. Default: ``None``. mode (str, optional): One of {triangular, triangular2, exp_range}. Values correspond to policies detailed above. If scale_fn is not None, this argument is ignored. Default: ``'triangular'``. gamma (float, optional): Constant in 'exp_range' scaling function: gamma**(cycle iterations). Default: ``1.0``. scale_fn (function, optional): Custom scaling policy defined by a single argument lambda function, where 0 <= scale_fn(x) <= 1 for all x >= 0. If specified, then 'mode' is ignored. Default: ``None``. scale_mode (str, optional): {'cycle', 'iterations'}. Defines whether scale_fn is evaluated on cycle number or cycle iterations (training iterations since start of cycle). Illegal inputs will use ``'iterations'`` by defaults. Default: ``'cycle'``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: ValueError: When `base_lr` is list or tuple, the length of it is not equal to the number of param groups. ValueError: When `max_lr` is list or tuple, the length of it is not equal to the number of param groups. ValueError: `mode` is not in [``'triangular'``, ``'triangular2'``, ``'exp_range'``] and `scale_fn` is None. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore.experimental import optim >>> from mindspore import nn >>> net = nn.Dense(3, 2) >>> optimizer = optim.SGD(net.trainable_params(), lr=0.1, momentum=0.9) >>> scheduler = optim.lr_scheduler.CyclicLR(optimizer, base_lr=0.01, max_lr=0.1) >>> expect_list = [[0.010045], [0.01009], [0.010135], [0.01018], [0.010225]] >>> >>> for i in range(5): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.010045)] [Tensor(shape=[], dtype=Float32, value= 0.01009)] [Tensor(shape=[], dtype=Float32, value= 0.010135)] [Tensor(shape=[], dtype=Float32, value= 0.01018)] [Tensor(shape=[], dtype=Float32, value= 0.010225)] """ def __init__(self, optimizer, base_lr, max_lr, step_size_up=2000, step_size_down=None, mode='triangular', gamma=1., scale_fn=None, scale_mode='cycle', last_epoch=-1): base_lrs = self._preprocess_input_param(optimizer, base_lr, 'base_lr') if last_epoch == -1: for lr, group in zip(base_lrs, optimizer.param_groups): group['lr'] = Parameter(lr) self.max_lrs = self._preprocess_input_param(optimizer, max_lr, 'max_lr') self.max_lrs = [Tensor(lr) for lr in self.max_lrs] step_size_up = float(step_size_up) step_size_down = step_size_up if step_size_down is None else float(step_size_down) self.total_step_size = step_size_up + step_size_down self.step_up_ratio = step_size_up / self.total_step_size if mode not in ['triangular', 'triangular2', 'exp_range'] \ and scale_fn is None: raise ValueError('mode is invalid and scale_fn is None') self.mode = mode self.gamma = gamma self._scale_fn_ref = None self._scale_fn_custom = scale_fn self.scale_mode = scale_mode self._init_scale_fn() self.floor = P.Floor() super(CyclicLR, self).__init__(optimizer, last_epoch) self.base_lrs = [Tensor(lr) for lr in base_lrs] def _init_scale_fn(self): """ Define the scale function. """ if self._scale_fn_custom is not None: return if self.mode == 'triangular': self._scale_fn_ref = self._triangular_scale_fn self.scale_mode = 'cycle' elif self.mode == 'triangular2': self._scale_fn_ref = self._triangular2_scale_fn self.scale_mode = 'cycle' elif self.mode == 'exp_range': self._scale_fn_ref = self._exp_range_scale_fn self.scale_mode = 'iterations' def _preprocess_input_param(self, optimizer, param, name): """Return correctly formatted lr/momentum for each param group.""" if isinstance(param, (list, tuple)): if len(param) != len(optimizer.param_groups): raise ValueError("Expected {} values for {}, got {}".format( len(optimizer.param_groups), name, len(param))) return param return [param] * len(optimizer.param_groups) def scale_fn(self, x): if self._scale_fn_custom is None: return self._scale_fn_ref(x) return self._scale_fn_custom(x) def _triangular_scale_fn(self, x): return 1. def _triangular2_scale_fn(self, x): return 1 / (2. ** (x - 1)) def _exp_range_scale_fn(self, x): return self.gamma ** (x) def get_lr(self): cycle = self.floor(1 + self.last_epoch / self.total_step_size) x = 1. + self.last_epoch / self.total_step_size - cycle if x <= self.step_up_ratio: scale_factor = x / self.step_up_ratio else: scale_factor = (x - 1) / (self.step_up_ratio - 1) lrs = [] for base_lr, max_lr in zip(self.base_lrs, self.max_lrs): base_height = (max_lr - base_lr) * scale_factor if self.scale_mode == 'cycle': lr = base_lr + base_height * self.scale_fn(cycle) else: lr = base_lr + base_height * self.scale_fn(self.last_epoch) lrs.append(lr) return lrs
[docs]@jit_class class CosineAnnealingWarmRestarts(LRScheduler): r""" Set the learning rate of each parameter group using a cosine annealing warm restarts schedule. Where :math:`\eta_{max}` is set to the initial lr, :math:`\eta_{min}` is the minimum value for learning rate, :math:`\eta_{t}` is the current learning rate, :math:`T_{0}` is the number of iterations for the first restar, :math:`T_{i}` is the current number of iterations between two warm restarts in SGDR, :math:`T_{cur}` is the number of epochs since the last restart in SGDR. .. math:: \eta_t = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 + \cos\left(\frac{T_{cur}}{T_{i}}\pi\right)\right) When :math:`T_{cur}=T_{i}`, set :math:`\eta_t = \eta_{min}`. When :math:`T_{cur}=0` after restart, set :math:`\eta_t=\eta_{max}`. For more details, please refer to: `SGDR: Stochastic Gradient Descent with Warm Restarts <https://arxiv.org/abs/1608.03983>`_. .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. T_0 (int): Number of iterations for the first restart. T_mult (int, optional): A factor increases :math:`T_{i}` after a restart. Default: ``1``. eta_min (Union(float, int), optional): Minimum learning rate. Default: ``0``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Raises: ValueError: `T_0` is less than or equal than 0 or not an int. ValueError: `T_mult` is less than or equal than 1 or not an int. ValueError: `eta_min` is not int or float. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore.experimental import optim >>> from mindspore import nn >>> net = nn.Dense(3, 2) >>> optimizer = optim.SGD(net.trainable_params(), lr=0.1, momentum=0.9) >>> scheduler = optim.lr_scheduler.CosineAnnealingWarmRestarts(optimizer, 2) >>> iters = 3 >>> for epoch in range(2): ... for i in range(iters): ... scheduler.step(epoch + i / iters) ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.1)] [Tensor(shape=[], dtype=Float32, value= 0.0933013)] [Tensor(shape=[], dtype=Float32, value= 0.075)] [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0.025)] [Tensor(shape=[], dtype=Float32, value= 0.00669873)] """ def __init__(self, optimizer, T_0, T_mult=1, eta_min=0, last_epoch=-1): if T_0 <= 0 or not isinstance(T_0, int): raise ValueError("T_0 should be an integer and equal or greater than 0, but got {}".format(T_0)) if T_mult < 1 or not isinstance(T_mult, int): raise ValueError("T_mult should be an integer and equal or greater than 1, but got {}".format(T_mult)) self.T_0 = Parameter(Tensor(T_0, dtype=mstype.float32), name='T_0') self.T_i = Parameter(Tensor(T_0, dtype=mstype.float32), name='T_i') self.T_mult = T_mult Validator.check_value_type('eta_min', eta_min, [float, int]) self.eta_min = Tensor(eta_min) self.T_cur = Parameter(Tensor(last_epoch, dtype=mstype.float32), name='T_cur') self.increase_tensor = Tensor(1, mstype.int32) self.zero_tensor = Tensor(0, mstype.int32) self.math_pi = math.pi self.cos = P.Cos() self.cast = P.Cast() self.log = P.Log() self.cast = P.Cast() self.assign = P.Assign() self.floor = P.Floor() self._last_lr = [group["lr"] for group in optimizer.param_groups] super(CosineAnnealingWarmRestarts, self).__init__(optimizer, last_epoch) def get_lr(self): pct = self.cast(self.math_pi * self.T_cur / self.T_i, mstype.float32) return [self.eta_min + (base_lr - self.eta_min) * (1 + self.cos(pct)) / 2 for base_lr in self.base_lrs]
[docs] def step(self, epoch=None): """ Get the current learning rate and change the learning rate. Args: epoch (int, optional): The index of the last epoch. Default: ``None``. """ if epoch is None and self.last_epoch < 0: epoch = self.zero_tensor if epoch is None: epoch = self.last_epoch + 1 ops.assign_add(self.T_cur, self.increase_tensor) if self.T_cur >= self.T_i: ops.assign(self.T_cur, self.T_cur - self.T_i) ops.assign(self.T_i, self.T_i * self.T_mult) else: if epoch < 0: raise ValueError("epoch should be a non-negative integer, but got {}".format(epoch)) epoch = self.cast(epoch, mstype.float32) if epoch >= self.T_0: if self.T_mult == 1: ops.assign(self.T_cur, epoch % self.T_0) else: exp = int(self.log((epoch / self.T_0 * (self.T_mult - 1) + 1), self.T_mult)) value = epoch - self.T_0 * (self.T_mult ** exp - 1) / (self.T_mult - 1) ops.assign(self.T_cur, value) ops.assign(self.T_i, self.T_0 * self.T_mult ** exp) else: ops.assign(self.T_i, self.T_0.value()) ops.assign(self.T_cur, self.cast(epoch, mstype.float32)) self.assign(self.last_epoch, self.floor(epoch)) for i, data in enumerate(zip(self.optimizer.param_groups, self.get_lr())): _, lr = data F.assign(self.optimizer.param_groups[i]["lr"], lr)
[docs]@jit_class class CosineAnnealingLR(LRScheduler): r""" Set the learning rate of each parameter group using a cosine annealing lr schedule. Where :math:`\eta_{max}` is set to the initial lr, :math:`\eta_{min}` is the minimum value for learning rate, :math:`\eta_{t}` is the current learning rate, :math:`\T_{max}` is iteration number of cosine function, and :math:`T_{cur}` is the number of epochs since the last restart in SGDR. .. math:: \begin{aligned} \eta_t & = \eta_{min} + \frac{1}{2}(\eta_{max} - \eta_{min})\left(1 + \cos\left(\frac{T_{cur}}{T_{max}}\pi\right)\right), & T_{cur} \neq (2k+1)T_{max}; \\ \eta_{t+1} & = \eta_{t} + \frac{1}{2}(\eta_{max} - \eta_{min}) \left(1 - \cos\left(\frac{1}{T_{max}}\pi\right)\right), & T_{cur} = (2k+1)T_{max}. \end{aligned} For more details, please refer to: `SGDR: Stochastic Gradient Descent with Warm Restarts <https://arxiv.org/abs/1608.03983>`_ .. warning:: This is an experimental lr scheduler module that is subject to change. This module must be used with optimizers in `Experimental Optimizer <https://www.mindspore.cn/docs/en/r2.2/api_python/mindspore.experimental.html#experimental-optimizer>`_ . Args: optimizer (:class:`mindspore.experimental.optim.Optimizer`): Wrapped optimizer. T_max (int): Maximum number of iterations. eta_min (float, optional): Minimum learning rate. Default: ``0``. last_epoch (int, optional): The index of the last epoch. Default: ``-1``. Supported Platforms: ``Ascend`` ``GPU`` ``CPU`` Examples: >>> from mindspore.experimental import optim >>> from mindspore import nn >>> net = nn.Dense(3, 2) >>> optimizer = optim.SGD(net.trainable_params(), lr=0.1, momentum=0.9) >>> scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=2) >>> >>> for i in range(6): ... scheduler.step() ... current_lr = scheduler.get_last_lr() ... print(current_lr) [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0)] [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0.1)] [Tensor(shape=[], dtype=Float32, value= 0.05)] [Tensor(shape=[], dtype=Float32, value= 0)] """ def __init__(self, optimizer, T_max, eta_min=0, last_epoch=-1): self.T_max = T_max self.eta_min = eta_min self.math_pi = math.pi self.cos = P.Cos() self.cast = P.Cast() super(CosineAnnealingLR, self).__init__(optimizer, last_epoch) def get_lr(self): lrs = [lr.value() for lr in self._last_lr] if self.last_epoch == 0: return lrs if (self.last_epoch - 1 - self.T_max) % (2 * self.T_max) == 0: pct_pi = self.cast(self.math_pi / self.T_max, mstype.float32) return [lr + (base_lr - self.eta_min) * (1 - self.cos(pct_pi)) / 2 for base_lr, lr in zip(self.base_lrs, lrs)] return [(1 + self.cos(self.math_pi * self.last_epoch / self.T_max)) / (1 + self.cos(self.math_pi * (self.last_epoch - 1) / self.T_max)) * (lr - self.eta_min) + self.eta_min for lr in lrs] def _get_closed_form_lr(self): return [self.eta_min + (base_lr - self.eta_min) * (1 + self.cos(self.math_pi * self.last_epoch / self.T_max)) / 2 for base_lr in self.base_lrs]