Source code for mindspore_rl.network.gru_net

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Cudnn Gru network.
"""
from mindspore_rl.network._rnns import GRU

import mindspore.nn as nn
from mindspore import context
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore import _checkparam as validator
from mindspore.ops.operations import _rl_inner_ops as rl_ops


[docs]class GruNet(nn.Cell):
    """
    Stacked GRU (Gated Recurrent Unit) layers.

    Apply GRU layer to the input.

    For detailed information, please refer to
    `mindspore.nn.GRU <https://www.mindspore.cn/docs/en/master/api_python/nn/mindspore.nn.GRU.html>`_.

    Args:
        input_size (int): Number of features of input.
        hidden_size (int):  Number of features of hidden layer.
        weight_init (str or Initializer): Initialize method, e.g. ``'normal'`` , ``'uniform'``.
            Default: ``'normal'`` .
        num_layers (int): Number of layers of stacked GRU. Default: ``1`` .
        has_bias (bool): Whether the cell has bias. Default: ``True`` .
        batch_first (bool): Specifies whether the first dimension of input `x` is batch_size. Default: ``False`` .
        dropout (float): If not ``0.0`` , append `Dropout` layer on the outputs of each
            GRU layer except the last layer. Default ``0.0`` . The range of dropout is [0.0, 1.0).
        bidirectional (bool): Specifies whether it is a bidirectional GRU,
            num_directions is 2 if `bidirectional` is ``True`` otherwise 1. Default: ``False`` .
        enable_fusion (bool): Whether need to use GRU fusion ops. Default: ``True`` .

    Inputs:
        - **x_in** (Tensor) - Tensor of data type mindspore.float32 and
          shape :math:`(seq\_len, batch\_size, input\_size)` or :math:`(batch\_size, seq\_len, input\_size)`.
        - **h_in** (Tensor) - Tensor of data type mindspore.float32 and
          shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. The data type of `h_in`
          must be the same as `x_in`.

    Outputs:
        Tuple, a tuple contains (`x_out`, `h_out`).

        - **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or
          :math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`.
        - **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.

    Examples:
        >>> net = GruNet(10, 16, 2, has_bias=True, bidirectional=False)
        >>> x_in = Tensor(np.ones([3, 5, 10]).astype(np.float32))
        >>> h_in = Tensor(np.ones([1, 5, 16]).astype(np.float32))
        >>> x_out, h_out = net(x_in, h_in)
        >>> print(x_out.shape)
        (3, 5, 16)
    """

    def __init__(self,
                 input_size,
                 hidden_size,
                 weight_init='normal',
                 num_layers=1,
                 has_bias=True,
                 batch_first=False,
                 dropout=0.0,
                 bidirectional=False,
                 enable_fusion=True):

        super().__init__()
        validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
        validator.check_positive_int(input_size, "input_size", self.cls_name)
        validator.check_positive_int(num_layers, "num_layers", self.cls_name)
        validator.check_is_float(dropout, "dropout", self.cls_name)
        validator.check_value_type("has_bias", has_bias, [bool], self.cls_name)
        validator.check_value_type(
            "batch_first", batch_first, [bool], self.cls_name)
        validator.check_value_type(
            "bidirectional", bidirectional, [bool], self.cls_name)

        self.batch_first = batch_first
        self.bidirectional = bidirectional
        self.dropout = dropout
        self.enable_cudnn = context.get_context('device_target') in ['GPU']
        self.enable_fusion = enable_fusion
        if self.enable_cudnn and self.enable_fusion:
            weight_size = 0
            gate_size = 3 * hidden_size
            num_directions = 2 if bidirectional else 1
            for layer in range(num_layers):
                input_layer_size = input_size if layer == 0 else hidden_size * num_directions
                increment_size = gate_size * input_layer_size
                increment_size += gate_size * hidden_size
                if has_bias:
                    increment_size += 2 * gate_size
                weight_size += increment_size * num_directions
            self.weight = Parameter(initializer(
                weight_init, [weight_size, 1, 1]), name="cudnn_weight")
            self.gru = rl_ops.CudnnGRU(input_size=input_size,
                                       hidden_size=hidden_size,
                                       num_layers=num_layers,
                                       has_bias=has_bias,
                                       bidirectional=bidirectional,
                                       dropout=float(dropout))
        else:
            self.gru = GRU(input_size=input_size,
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           has_bias=has_bias,
                           batch_first=batch_first,
                           bidirectional=bidirectional,
                           dropout=float(dropout),
                           enable_fusion=enable_fusion)

[docs]    def construct(self, x_in, h_in):
        """
        The forward calculation of gru net

        Args:
            x_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(seq\_len, batch\_size, input\_size)`
                or :math:`(batch\_size, seq\_len, input\_size)`.
            h_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(num\_directions * num\_layers,
                batch\_size, hidden\_size)`. The data type of `h_in` must be the same as `x_in`.

        Returns:
            - **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or
              :math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`.
            - **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
        """
        x_out = None
        h_out = None
        if self.enable_cudnn and self.enable_fusion:
            x_out, h_out, _, _ = self.gru(x_in, h_in, self.weight)
        else:
            x_out, h_out = self.gru(x_in, h_in)
        return x_out, h_out