# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""base process"""
import copy
import mindspore.nn as nn
from mindspore.nn.optim import LARS
from mindspore import log as logger
from mindspore.common import Parameter
from .less_batch_normalization import CommonHeadLastFN
__all__ = ["OptimizerProcess", "ParameterProcess"]
[docs]class OptimizerProcess:
"""
Process optimizer for Boost. Currently, this class supports adding GC(grad centralization) tags
and creating new optimizers.
Args:
opt (Cell): Optimizer used.
Examples:
>>> from mindspore import Tensor, Parameter, nn
>>> import mindspore.ops import ops
>>> from mindspore.boost import OptimizerProcess
>>>
>>> class Net(nn.Cell):
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.matmul = ops.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... return output
...
>>> size, in_features, out_features = 16, 16, 10
>>> network = Net(in_features, out_features)
>>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9)
>>> optimizer_process = OptimizerProcess(optimizer)
>>> optimizer_process.add_grad_centralization(network)
>>> optimizer = optimizer_process.generate_new_optimizer()
"""
def __init__(self, opt):
if isinstance(opt, LARS):
self.is_lars = True
self.opt_class = type(opt.opt)
self.opt_init_args = opt.opt.init_args
self.lars_init_args = opt.init_args
else:
self.is_lars = False
self.opt_class = type(opt)
self.opt_init_args = opt.init_args
self.origin_params = opt.init_params["params"]
[docs] def build_params_dict(self, network):
"""Build the params dict of the network"""
cells = network.cells_and_names()
params_dict = {}
for _, cell in cells:
for par in cell.get_parameters(expand=False):
params_dict[id(par)] = cell
return params_dict
[docs] def build_gc_params_group(self, params_dict, parameters):
"""Build the params group that needs gc"""
group_params = []
for group_param in parameters:
if 'order_params' in group_param.keys():
group_params.append(group_param)
continue
params_gc_value = []
params_value = []
for param in group_param['params']:
if 'beta' not in param.name and 'gamma' not in param.name and 'bias' not in param.name:
param_cell = params_dict[id(param)]
if (isinstance(param_cell, nn.Conv2d) and param_cell.group > 1) or \
isinstance(param_cell, CommonHeadLastFN):
params_value.append(param)
else:
params_gc_value.append(param)
else:
params_value.append(param)
if params_gc_value:
new_group_param = copy.deepcopy(group_param)
new_group_param['params'] = params_gc_value
new_group_param['grad_centralization'] = True
group_params.append(new_group_param)
if params_value:
new_group_param = copy.deepcopy(group_param)
new_group_param['params'] = params_value
group_params.append(new_group_param)
return group_params
[docs] def add_grad_centralization(self, network):
"""Add gradient centralization."""
params_dict = self.build_params_dict(network)
parameters = self.origin_params
if parameters is not None and not isinstance(parameters, list):
parameters = list(parameters)
if not parameters:
raise ValueError("Optimizer got an empty parameter list.")
if not isinstance(parameters[0], (dict, Parameter)):
raise TypeError("Only a list of Parameter or dict can be supported.")
if isinstance(parameters[0], Parameter):
logger.warning("Only group parameters support gradient centralization.")
return
self.origin_params = self.build_gc_params_group(params_dict, parameters)
[docs] def generate_new_optimizer(self):
"""Generate new optimizer."""
if not self.is_lars:
opt = self.opt_class(params=self.origin_params, **self.opt_init_args)
else:
opt = LARS(self.opt_class(params=self.origin_params, **self.opt_init_args), **self.lars_init_args)
return opt
[docs]class ParameterProcess:
"""
Process parameter for Boost. Currently, this class supports creating group parameters
and automatically setting gradient segmentation point.
Examples:
>>> from mindspore import Tensor, Parameter, nn
>>> import mindspore.ops as ops
>>> from mindspore.boost import OptimizerProcess
>>>
>>> class Net(nn.Cell):
... def __init__(self, in_features, out_features):
... super(Net, self).__init__()
... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight')
... self.weight2 = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)),
... name='weight2')
... self.matmul = ops.MatMul()
... self.matmul2 = ops.MatMul()
...
... def construct(self, x):
... output = self.matmul(x, self.weight)
... output2 = self.matmul2(x, self.weight2)
... return output + output2
...
>>> size, in_features, out_features = 16, 16, 10
>>> network = Net(in_features, out_features)
>>> new_parameter = net.trainable_params()[:1]
>>> parameter_process = ParameterProcess()
>>> group_params = parameter_process.generate_group_params(new_parameter, net.trainable_params())
"""
def __init__(self):
self._parameter_indices = 1
[docs] def assign_parameter_group(self, parameters, split_point=None):
"""Assign parameter group."""
if not isinstance(parameters, (list, tuple)) or not parameters:
return parameters
parameter_len = len(parameters)
if split_point:
split_parameter_index = split_point
else:
split_parameter_index = [parameter_len // 2]
for i in range(parameter_len):
if i in split_parameter_index:
self._parameter_indices += 1
parameters[i].comm_fusion = self._parameter_indices
return parameters
[docs] def generate_group_params(self, parameters, origin_params):
"""Generate group parameters."""
origin_params_copy = origin_params
if origin_params_copy is not None:
if not isinstance(origin_params_copy, list):
origin_params_copy = list(origin_params_copy)
if not origin_params_copy:
raise ValueError("Optimizer got an empty parameter list.")
if not isinstance(origin_params_copy[0], (dict, Parameter)):
raise TypeError("Only a list of Parameter or dict can be supported.")
if isinstance(origin_params_copy[0], Parameter):
group_params = [{"params": parameters}]
return group_params
group_params = []
params_name = [param.name for param in parameters]
new_params_count = copy.deepcopy(params_name)
new_params_clone = {}
max_key_number = 0
for group_param in origin_params_copy:
if 'order_params' in group_param.keys():
new_group_param = copy.deepcopy(group_param)
new_group_param['order_params'] = parameters
group_params.append(new_group_param)
continue
params_value = []
for param in group_param['params']:
if param.name in params_name:
index = params_name.index(param.name)
params_value.append(parameters[index])
new_params_count.remove(param.name)
new_group_param = copy.deepcopy(group_param)
new_group_param['params'] = params_value
group_params.append(new_group_param)
if len(group_param.keys()) > max_key_number:
max_key_number = len(group_param.keys())
new_params_clone = copy.deepcopy(group_param)
if new_params_count:
params_value = []
for param in new_params_count:
index = params_name.index(param)
params_value.append(parameters[index])
if new_params_clone:
new_params_clone['params'] = params_value
group_params.append(new_params_clone)
else:
group_params.append({"params": params_value})
return group_params