Source code for mindspore_rl.network.gru_net

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Cudnn Gru network.
"""
from mindspore_rl.network._rnns import GRU

import mindspore.nn as nn
from mindspore import context
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore import _checkparam as validator
from mindspore.ops.operations import _rl_inner_ops as rl_ops


[docs]class GruNet(nn.Cell): """ Stacked GRU (Gated Recurrent Unit) layers. Apply GRU layer to the input. For detailed information, please refer to `mindspore.nn.GRU <https://www.mindspore.cn/docs/en/master/api_python/nn/mindspore.nn.GRU.html>`_. Args: input_size (int): Number of features of input. hidden_size (int): Number of features of hidden layer. weight_init (str or Initializer): Initialize method, e.g. ``'normal'`` , ``'uniform'``. Default: ``'normal'`` . num_layers (int): Number of layers of stacked GRU. Default: ``1`` . has_bias (bool): Whether the cell has bias. Default: ``True`` . batch_first (bool): Specifies whether the first dimension of input `x` is batch_size. Default: ``False`` . dropout (float): If not ``0.0`` , append `Dropout` layer on the outputs of each GRU layer except the last layer. Default ``0.0`` . The range of dropout is [0.0, 1.0). bidirectional (bool): Specifies whether it is a bidirectional GRU, num_directions is 2 if `bidirectional` is ``True`` otherwise 1. Default: ``False`` . enable_fusion (bool): Whether need to use GRU fusion ops. Default: ``True`` . Inputs: - **x_in** (Tensor) - Tensor of data type mindspore.float32 and shape :math:`(seq\_len, batch\_size, input\_size)` or :math:`(batch\_size, seq\_len, input\_size)`. - **h_in** (Tensor) - Tensor of data type mindspore.float32 and shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. The data type of `h_in` must be the same as `x_in`. Outputs: Tuple, a tuple contains (`x_out`, `h_out`). - **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or :math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`. - **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. Examples: >>> net = GruNet(10, 16, 2, has_bias=True, bidirectional=False) >>> x_in = Tensor(np.ones([3, 5, 10]).astype(np.float32)) >>> h_in = Tensor(np.ones([1, 5, 16]).astype(np.float32)) >>> x_out, h_out = net(x_in, h_in) >>> print(x_out.shape) (3, 5, 16) """ def __init__(self, input_size, hidden_size, weight_init='normal', num_layers=1, has_bias=True, batch_first=False, dropout=0.0, bidirectional=False, enable_fusion=True): super().__init__() validator.check_positive_int(hidden_size, "hidden_size", self.cls_name) validator.check_positive_int(input_size, "input_size", self.cls_name) validator.check_positive_int(num_layers, "num_layers", self.cls_name) validator.check_is_float(dropout, "dropout", self.cls_name) validator.check_value_type("has_bias", has_bias, [bool], self.cls_name) validator.check_value_type( "batch_first", batch_first, [bool], self.cls_name) validator.check_value_type( "bidirectional", bidirectional, [bool], self.cls_name) self.batch_first = batch_first self.bidirectional = bidirectional self.dropout = dropout self.enable_cudnn = context.get_context('device_target') in ['GPU'] self.enable_fusion = enable_fusion if self.enable_cudnn and self.enable_fusion: weight_size = 0 gate_size = 3 * hidden_size num_directions = 2 if bidirectional else 1 for layer in range(num_layers): input_layer_size = input_size if layer == 0 else hidden_size * num_directions increment_size = gate_size * input_layer_size increment_size += gate_size * hidden_size if has_bias: increment_size += 2 * gate_size weight_size += increment_size * num_directions self.weight = Parameter(initializer( weight_init, [weight_size, 1, 1]), name="cudnn_weight") self.gru = rl_ops.CudnnGRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, bidirectional=bidirectional, dropout=float(dropout)) else: self.gru = GRU(input_size=input_size, hidden_size=hidden_size, num_layers=num_layers, has_bias=has_bias, batch_first=batch_first, bidirectional=bidirectional, dropout=float(dropout), enable_fusion=enable_fusion)
[docs] def construct(self, x_in, h_in): """ The forward calculation of gru net Args: x_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(seq\_len, batch\_size, input\_size)` or :math:`(batch\_size, seq\_len, input\_size)`. h_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. The data type of `h_in` must be the same as `x_in`. Returns: - **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or :math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`. - **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. """ x_out = None h_out = None if self.enable_cudnn and self.enable_fusion: x_out, h_out, _, _ = self.gru(x_in, h_in, self.weight) else: x_out, h_out = self.gru(x_in, h_in) return x_out, h_out