Source code for mindspore.boost.less_batch_normalization

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""less Batch Normalization"""
from __future__ import absolute_import

import numpy as np
from mindspore.nn.cell import Cell
from mindspore.nn.layer import Dense
from mindspore.ops import operations as P
from mindspore.common import Tensor, Parameter
from mindspore.common import dtype as mstype
from mindspore.common.initializer import initializer


__all__ = ["CommonHeadLastFN", "LessBN"]


class CommonHeadLastFN(Cell):
    r"""
    The last full Normalization layer.

    This layer implements the operation as:

    .. math::
        \text{inputs} = \text{norm}(\text{inputs})
        \text{kernel} = \text{norm}(\text{kernel})
        \text{outputs} = \text{multiplier} * (\text{inputs} * \text{kernel} + \text{bias}),

    Args:
        in_channels (int): The number of channels in the input space.
        out_channels (int): The number of channels in the output space.
        weight_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable weight_init parameter. The dtype
            is same as input x. The values of str refer to the function `initializer`. Default: 'normal'.
        bias_init (Union[Tensor, str, Initializer, numbers.Number]): The trainable bias_init parameter. The dtype is
            same as input x. The values of str refer to the function `initializer`. Default: 'zeros'.
        has_bias (bool): Specifies whether the layer uses a bias vector. Default: ``True``.

    Supported Platforms:
        ``Ascend`` ``GPU`` ``CPU``

    Examples:
        >>> input = Tensor(np.array([[180, 234, 154], [244, 48, 247]]), mindspore.float32)
        >>> net = CommonHeadLastFN(3, 4)
        >>> output = net(input)
    """
    def __init__(self,
                 in_channels,
                 out_channels,
                 weight_init='normal',
                 bias_init='zeros',
                 has_bias=True):
        super(CommonHeadLastFN, self).__init__()
        weight_shape = [out_channels, in_channels]
        self.weight = Parameter(initializer(weight_init, weight_shape), requires_grad=True, name='weight')
        self.x_norm = P.L2Normalize(axis=1)
        self.w_norm = P.L2Normalize(axis=1)
        self.fc = P.MatMul(transpose_a=False, transpose_b=True)
        self.multiplier = Parameter(Tensor(np.ones([1]), mstype.float32), requires_grad=True, name='multiplier')
        self.has_bias = has_bias
        if self.has_bias:
            bias_shape = [out_channels]
            self.bias_add = P.BiasAdd()
            self.bias = Parameter(initializer(bias_init, bias_shape), requires_grad=True, name='bias')

    def construct(self, x):
        x = self.x_norm(x)
        w = self.w_norm(self.weight)
        x = self.fc(x, w)
        if self.has_bias:
            x = self.bias_add(x, self.bias)
        x = self.multiplier * x
        return x


[docs]class LessBN(Cell): """ Reduce the number of BN automatically to improve the network performance and ensure the network accuracy. Args: network (Cell): Network to be modified. fn_flag (bool): Replace FC with FN. Default: ``False`` . Examples: >>> import numpy as np >>> from mindspore import Tensor, Parameter, nn >>> import mindspore.ops as ops >>> from mindspore.nn import WithLossCell >>> from mindspore import dtype as mstype >>> from mindspore import boost >>> >>> class Net(nn.Cell): ... def __init__(self, in_features, out_features): ... super(Net, self).__init__() ... self.weight = Parameter(Tensor(np.ones([in_features, out_features]).astype(np.float32)), ... name='weight') ... self.matmul = ops.MatMul() ... ... def construct(self, x): ... output = self.matmul(x, self.weight) ... return output >>> size, in_features, out_features = 16, 16, 10 >>> net = Net(in_features, out_features) >>> loss = nn.MSELoss() >>> optimizer = nn.Momentum(net.trainable_params(), learning_rate=0.1, momentum=0.9) >>> net_with_loss = WithLossCell(net, loss) >>> inputs = Tensor(np.ones([size, in_features]).astype(np.float32)) >>> label = Tensor(np.zeros([size, out_features]).astype(np.float32)) >>> train_network = boost.LessBN(net_with_loss) >>> output = train_network(inputs, label) """ def __init__(self, network, fn_flag=False): super(LessBN, self).__init__() self.network = network self.network.set_boost("less_bn") self.network.update_cell_prefix() if fn_flag: self._convert_to_less_bn_net(self.network) self.network.add_flags(defer_inline=True) @staticmethod def _convert_dense(subcell): """ convert dense cell to FN cell """ prefix = subcell.param_prefix new_subcell = CommonHeadLastFN(subcell.in_channels, subcell.out_channels, subcell.weight, subcell.bias, False) new_subcell.update_parameters_name(prefix + '.') return new_subcell def construct(self, *inputs): return self.network(*inputs) def _convert_to_less_bn_net(self, net): """ convert network to less_bn network """ cells = net.name_cells() dense_name = [] dense_list = [] for name in cells: subcell = cells[name] if subcell == net: continue if isinstance(subcell, (Dense)): dense_name.append(name) dense_list.append(subcell) else: self._convert_to_less_bn_net(subcell) if dense_list: new_subcell = LessBN._convert_dense(dense_list[-1]) net.insert_child_to_cell(dense_name[-1], new_subcell)