mindspore_rl.core.uniform_replay_buffer 源代码

# Copyright 2021-2023 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""
Implementation of Replay Buffer class.
"""

import numpy as np
import mindspore as ms
from mindspore import get_seed
from mindspore import context, Tensor
from mindspore.ops import operations as P
from mindspore.common.parameter import Parameter, ParameterTuple
import mindspore.nn as nn


def _create_tensor(capacity, shapes, types):
    """
    Create tensors of replay buffer, and store them into a list.

    Args:
        capacity (int): capacity of the buffer.
        shapes (List[int]): the shape of the tensor.
        types (List[mindspore.dtype]): the type of the tensor.

    Returns:
        buffer(List[Tensor]), a list of tensor which states for the replay buffer
    """
    buffer = []
    for i in range(len(shapes)):
        buffer.append(Parameter(Tensor(np.zeros(((capacity,) + shapes[i])),
                                       types[i]), name=('buffer_%d' % i), requires_grad=False))
    return buffer


[文档]class UniformReplayBuffer(nn.Cell):
    """
    The replay buffer class.
    The replay buffer will store the experience from environment. In replay
    buffer, each element is a list of tensors. Therefore, the constructor of the
    UniformReplayBuffer class takes the shape and type of each tensor as an argument.

    Args:
        batch_size (int): size for sampling from the buffer.
        capacity (int): the capacity of the buffer.
        shapes (list[int]): the shape of each tensor in a buffer element.
        types (list[mindspore.dtype]): the data type of each tensor in a buffer element.

    Examples:
        >>> import mindspore as ms
        >>> from mindspore_rl.core.uniform_replay_buffer import UniformReplayBuffer
        >>> batch_size = 10
        >>> capacity = 10000
        >>> shapes = [(4,), (1,), (1,), (4,)]
        >>> types = [ms.float32, ms.int32, ms.float32, ms.float32]
        >>> replaybuffer = UniformReplayBuffer(batch_size, capacity, shapes, types)
        >>> print(replaybuffer)
        UniformReplayBuffer<>
    """

    def __init__(self, batch_size, capacity, shapes, types):
        nn.Cell.__init__(self)
        self.buffer = ParameterTuple(_create_tensor(capacity, shapes, types))
        self._capacity = capacity
        self.count = Parameter(
            Tensor(
                0,
                ms.int32),
            name="count",
            requires_grad=False)
        self.head = Parameter(
            Tensor(
                0,
                ms.int32),
            name="head",
            requires_grad=False)
        self.zero = Tensor(0, ms.int32)
        self.buffer_append = P.BufferAppend(self._capacity, shapes, types)
        self.buffer_get = P.BufferGetItem(self._capacity, shapes, types)
        seed = get_seed()
        if seed == None:
            seed = 0
        self.buffer_sample = P.BufferSample(
            self._capacity, batch_size, shapes, types, seed)
        if context.get_context('device_target') in ['Ascend']:
            self.buffer_append.add_prim_attr('primitive_target', 'CPU')
            self.buffer_get.add_prim_attr('primitive_target', 'CPU')
            self.buffer_sample.add_prim_attr('primitive_target', 'CPU')

        self.reshape = P.Reshape()
        self.assign = P.Assign()

        self.greater_equal = P.GreaterEqual()
        self.capacity_tensor = Tensor([capacity,], ms.int32)

[文档]    def insert(self, exp):
        """
        Insert an element to the buffer. If the buffer is full, FIFO strategy will be used to
        replace the element in the buffer.

        Args:
            exp (list[Tensor]): insert a list of tensor which matches with the initialized shape
                and type into the buffer.

        Returns:
             element (list[Tensor]), return the whole buffer after insertion

        """
        self.buffer_append(self.buffer, exp, self.count, self.head)
        return self.buffer

[文档]    def get_item(self, index):
        """
        Get an element from the replaybuffer in specific position(index).

        Args:
            index (int): the location of the item.

        Returns:
            element (List[Tensor]), the element from the buffer.
        """

        return self.buffer_get(self.buffer, self.count, self.head, index)

[文档]    def sample(self):
        """
        Sampling the replaybuffer, which means that it will randomly choose a set of element
        and output them.

        Returns:
            data (Tuple(Tensor)), A set of sampled elements from the buffer.
        """

        return self.buffer_sample(self.buffer, self.count, self.head)

[文档]    def reset(self):
        """
        Reset the replaybuffer. It changes the value of self.count to zero.

        Returns:
            success (boolean), whether the reset successful or not.
        """

        success = self.assign(self.count, self.zero)
        return success

[文档]    def size(self):
        """
        Return the size of the replybuffer.

        Returns:
            size (int), the number of element in the replaybuffer.
        """

        return self.count.asnumpy()

[文档]    def full(self):
        """
        Check if the replaybuffer is full or not.

        Returns:
            Full(bool), True if the replaybuffer is full, False otherwise.
        """

        count = self.reshape(self.count, (1,))
        capacity = self.reshape(self.capacity_tensor, (1,))
        return self.greater_equal(count, capacity)