# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Cudnn Gru network.
"""
from mindspore_rl.network._rnns import GRU
import mindspore.nn as nn
from mindspore import context
from mindspore.common.initializer import initializer
from mindspore.common.parameter import Parameter
from mindspore import _checkparam as validator
from mindspore.ops.operations import _rl_inner_ops as rl_ops
[docs]class GruNet(nn.Cell):
"""
Stacked GRU (Gated Recurrent Unit) layers.
Apply GRU layer to the input.
For detailed information, please refer to
`mindspore.nn.GRU <https://www.mindspore.cn/docs/en/master/api_python/nn/mindspore.nn.GRU.html>`_.
Args:
input_size (int): Number of features of input.
hidden_size (int): Number of features of hidden layer.
weight_init (str or Initializer): Initialize method, e.g. ``'normal'`` , ``'uniform'``.
Default: ``'normal'`` .
num_layers (int): Number of layers of stacked GRU. Default: ``1`` .
has_bias (bool): Whether the cell has bias. Default: ``True`` .
batch_first (bool): Specifies whether the first dimension of input `x` is batch_size. Default: ``False`` .
dropout (float): If not ``0.0`` , append `Dropout` layer on the outputs of each
GRU layer except the last layer. Default ``0.0`` . The range of dropout is [0.0, 1.0).
bidirectional (bool): Specifies whether it is a bidirectional GRU,
num_directions is 2 if `bidirectional` is ``True`` otherwise 1. Default: ``False`` .
enable_fusion (bool): Whether need to use GRU fusion ops. Default: ``True`` .
Inputs:
- **x_in** (Tensor) - Tensor of data type mindspore.float32 and
shape :math:`(seq\_len, batch\_size, input\_size)` or :math:`(batch\_size, seq\_len, input\_size)`.
- **h_in** (Tensor) - Tensor of data type mindspore.float32 and
shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`. The data type of `h_in`
must be the same as `x_in`.
Outputs:
Tuple, a tuple contains (`x_out`, `h_out`).
- **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or
:math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`.
- **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
Examples:
>>> net = GruNet(10, 16, 2, has_bias=True, bidirectional=False)
>>> x_in = Tensor(np.ones([3, 5, 10]).astype(np.float32))
>>> h_in = Tensor(np.ones([1, 5, 16]).astype(np.float32))
>>> x_out, h_out = net(x_in, h_in)
>>> print(x_out.shape)
(3, 5, 16)
"""
def __init__(self,
input_size,
hidden_size,
weight_init='normal',
num_layers=1,
has_bias=True,
batch_first=False,
dropout=0.0,
bidirectional=False,
enable_fusion=True):
super().__init__()
validator.check_positive_int(hidden_size, "hidden_size", self.cls_name)
validator.check_positive_int(input_size, "input_size", self.cls_name)
validator.check_positive_int(num_layers, "num_layers", self.cls_name)
validator.check_is_float(dropout, "dropout", self.cls_name)
validator.check_value_type("has_bias", has_bias, [bool], self.cls_name)
validator.check_value_type(
"batch_first", batch_first, [bool], self.cls_name)
validator.check_value_type(
"bidirectional", bidirectional, [bool], self.cls_name)
self.batch_first = batch_first
self.bidirectional = bidirectional
self.dropout = dropout
self.enable_cudnn = context.get_context('device_target') in ['GPU']
self.enable_fusion = enable_fusion
if self.enable_cudnn and self.enable_fusion:
weight_size = 0
gate_size = 3 * hidden_size
num_directions = 2 if bidirectional else 1
for layer in range(num_layers):
input_layer_size = input_size if layer == 0 else hidden_size * num_directions
increment_size = gate_size * input_layer_size
increment_size += gate_size * hidden_size
if has_bias:
increment_size += 2 * gate_size
weight_size += increment_size * num_directions
self.weight = Parameter(initializer(
weight_init, [weight_size, 1, 1]), name="cudnn_weight")
self.gru = rl_ops.CudnnGRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
has_bias=has_bias,
bidirectional=bidirectional,
dropout=float(dropout))
else:
self.gru = GRU(input_size=input_size,
hidden_size=hidden_size,
num_layers=num_layers,
has_bias=has_bias,
batch_first=batch_first,
bidirectional=bidirectional,
dropout=float(dropout),
enable_fusion=enable_fusion)
[docs] def construct(self, x_in, h_in):
"""
The forward calculation of gru net
Args:
x_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(seq\_len, batch\_size, input\_size)`
or :math:`(batch\_size, seq\_len, input\_size)`.
h_in (Tensor): Tensor of data type mindspore.float32 and shape :math:`(num\_directions * num\_layers,
batch\_size, hidden\_size)`. The data type of `h_in` must be the same as `x_in`.
Returns:
- **x_out** (Tensor) - Tensor of shape :math:`(seq\_len, batch\_size, num\_directions * hidden\_size)` or
:math:`(batch\_size, seq\_len, num\_directions * hidden\_size)`.
- **h_out** (Tensor) - Tensor of shape :math:`(num\_directions * num\_layers, batch\_size, hidden\_size)`.
"""
x_out = None
h_out = None
if self.enable_cudnn and self.enable_fusion:
x_out, h_out, _, _ = self.gru(x_in, h_in, self.weight)
else:
x_out, h_out = self.gru(x_in, h_in)
return x_out, h_out