Source code for mindspore.nn.probability.distribution.gamma

# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Gamma Distribution"""
import numpy as np
from mindspore.ops import operations as P
from mindspore.ops import composite as C
import mindspore.nn as nn
from mindspore._checkparam import Validator
from mindspore.common import dtype as mstype
from .distribution import Distribution
from ._utils.utils import check_greater_zero, check_distribution_name
from ._utils.custom_ops import log_generic


[docs]class Gamma(Distribution):
    """
    Gamma distribution.

    Args:
        concentration (list, numpy.ndarray, Tensor): The concentration,
          also know as alpha of the Gamma distribution. Default: None.
        rate (list, numpy.ndarray, Tensor): The rate, also know as
          beta of the Gamma distribution. Default: None.
        seed (int): The seed used in sampling. The global seed is used if it is None. Default: None.
        dtype (mindspore.dtype): The type of the event samples. Default: mstype.float32.
        name (str): The name of the distribution. Default: 'Gamma'.

    Supported Platforms:
        ``Ascend``

    Note:
        `concentration` and `rate` must be greater than zero.
        `dist_spec_args` are `concentration` and `rate`.
        `dtype` must be a float type because Gamma distributions are continuous.

    Examples:
        >>> import mindspore
        >>> import mindspore.nn as nn
        >>> import mindspore.nn.probability.distribution as msd
        >>> from mindspore import Tensor
        >>> # To initialize a Gamma distribution of the concentration 3.0 and the rate 4.0.
        >>> g1 = msd.Gamma([3.0], [4.0], dtype=mindspore.float32)
        >>> # A Gamma distribution can be initialized without arguments.
        >>> # In this case, `concentration` and `rate` must be passed in through arguments.
        >>> g2 = msd.Gamma(dtype=mindspore.float32)
        >>> # Here are some tensors used below for testing
        >>> value = Tensor([1.0, 2.0, 3.0], dtype=mindspore.float32)
        >>> concentration_a = Tensor([2.0], dtype=mindspore.float32)
        >>> rate_a = Tensor([2.0, 2.0, 2.0], dtype=mindspore.float32)
        >>> concentration_b = Tensor([1.0], dtype=mindspore.float32)
        >>> rate_b = Tensor([1.0, 1.5, 2.0], dtype=mindspore.float32)
        >>>
        >>> # Private interfaces of probability functions corresponding to public interfaces, including
        >>> # `prob`, `log_prob`, `cdf`, `log_cdf`, `survival_function`, and `log_survival`,
        >>> # have the same arguments as follows.
        >>> # Args:
        >>> #     value (Tensor): the value to be evaluated.
        >>> #     concentration (Tensor): the concentration of the distribution. Default: self._concentration.
        >>> #     rate (Tensor): the rate of the distribution. Default: self._rate.
        >>> # Examples of `prob`.
        >>> # Similar calls can be made to other probability functions
        >>> # by replacing 'prob' by the name of the function
        >>> ans = g1.prob(value)
        >>> print(ans.shape)
        (3,)
        >>> # Evaluate with respect to the distribution b.
        >>> ans = g1.prob(value, concentration_b, rate_b)
        >>> print(ans.shape)
        (3,)
        >>> # `concentration` and `rate` must be passed in during function calls for g2.
        >>> ans = g2.prob(value, concentration_a, rate_a)
        >>> print(ans.shape)
        (3,)
        >>> # Functions `mean`, `sd`, `mode`, `var`, and `entropy` have the same arguments.
        >>> # Args:
        >>> #     concentration (Tensor): the concentration of the distribution. Default: self._concentration.
        >>> #     rate (Tensor): the rate of the distribution. Default: self._rate.
        >>> # Example of `mean`, `sd`, `mode`, `var`, and `entropy` are similar.
        >>> ans = g1.mean()
        >>> print(ans.shape)
        (1,)
        >>> ans = g1.mean(concentration_b, rate_b)
        >>> print(ans.shape)
        (3,)
        >>> # `concentration` and `rate` must be passed in during function calls.
        >>> ans = g2.mean(concentration_a, rate_a)
        >>> print(ans.shape)
        (3,)
        >>> # Interfaces of 'kl_loss' and 'cross_entropy' are the same:
        >>> # Args:
        >>> #     dist (str): the type of the distributions. Only "Gamma" is supported.
        >>> #     concentration_b (Tensor): the concentration of distribution b.
        >>> #     rate_b (Tensor): the rate of distribution b.
        >>> #     concentration_a (Tensor): the concentration of distribution a. Default: self._concentration.
        >>> #     rate_a (Tensor): the rate of distribution a. Default: self._rate.
        >>> # Examples of `kl_loss`. `cross_entropy` is similar.
        >>> ans = g1.kl_loss('Gamma', concentration_b, rate_b)
        >>> print(ans.shape)
        (3,)
        >>> ans = g1.kl_loss('Gamma', concentration_b, rate_b, concentration_a, rate_a)
        >>> print(ans.shape)
        (3,)
        >>> # Additional `concentration` and `rate` must be passed in.
        >>> ans = g2.kl_loss('Gamma', concentration_b, rate_b, concentration_a, rate_a)
        >>> print(ans.shape)
        (3,)
        >>> # Examples of `sample`.
        >>> # Args:
        >>> #     shape (tuple): the shape of the sample. Default: ()
        >>> #     concentration (Tensor): the concentration of the distribution. Default: self._concentration.
        >>> #     rate (Tensor): the rate of the distribution. Default: self._rate.
        >>> ans = g1.sample()
        >>> print(ans.shape)
        (1,)
        >>> ans = g1.sample((2,3))
        >>> print(ans.shape)
        (2, 3, 1)
        >>> ans = g1.sample((2,3), concentration_b, rate_b)
        >>> print(ans.shape)
        (2, 3, 3)
        >>> ans = g2.sample((2,3), concentration_a, rate_a)
        >>> print(ans.shape)
        (2, 3, 3)
    """

    def __init__(self,
                 concentration=None,
                 rate=None,
                 seed=None,
                 dtype=mstype.float32,
                 name="Gamma"):
        """
        Constructor of Gamma.
        """
        param = dict(locals())
        param['param_dict'] = {'concentration': concentration, 'rate': rate}
        valid_dtype = mstype.float_type
        Validator.check_type_name("dtype", dtype, valid_dtype, type(self).__name__)

        # As some operators can't accept scalar input, check the type here
        if isinstance(concentration, (int, float)):
            raise TypeError("Input concentration can't be scalar")
        if isinstance(rate, (int, float)):
            raise TypeError("Input rate can't be scalar")

        super(Gamma, self).__init__(seed, dtype, name, param)

        self._concentration = self._add_parameter(concentration, 'concentration')
        self._rate = self._add_parameter(rate, 'rate')
        if self._concentration is not None:
            check_greater_zero(self._concentration, "concentration")
        if self._rate is not None:
            check_greater_zero(self._rate, "rate")

        # ops needed for the class
        self.log = log_generic
        self.square = P.Square()
        self.sqrt = P.Sqrt()
        self.squeeze = P.Squeeze(0)
        self.cast = P.Cast()
        self.dtypeop = P.DType()
        self.fill = P.Fill()
        self.shape = P.Shape()
        self.select = P.Select()
        self.greater = P.Greater()
        self.lgamma = nn.LGamma()
        self.digamma = nn.DiGamma()
        self.igamma = nn.IGamma()

    def extend_repr(self):
        if self.is_scalar_batch:
            s = f'concentration = {self._concentration}, rate = {self._rate}'
        else:
            s = f'batch_shape = {self._broadcast_shape}'
        return s

    @property
    def concentration(self):
        """
        Return the concentration, also know as the alpha of the Gamma distribution,
        after casting to dtype.
        """
        return self._concentration

    @property
    def rate(self):
        """
        Return the rate, also know as the beta of the Gamma distribution,
        after casting to dtype.
        """
        return self._rate

    def _get_dist_type(self):
        return "Gamma"

    def _get_dist_args(self, concentration=None, rate=None):
        if concentration is not None:
            self.checktensor(concentration, 'concentration')
        else:
            concentration = self._concentration
        if rate is not None:
            self.checktensor(rate, 'rate')
        else:
            rate = self._rate
        return concentration, rate

    def _mean(self, concentration=None, rate=None):
        """
        The mean of the distribution.
        """
        concentration, rate = self._check_param_type(concentration, rate)
        return concentration / rate

    def _var(self, concentration=None, rate=None):
        """
        The variance of the distribution.
        """
        concentration, rate = self._check_param_type(concentration, rate)
        return concentration / self.square(rate)

    def _sd(self, concentration=None, rate=None):
        """
        The standard deviation of the distribution.
        """
        concentration, rate = self._check_param_type(concentration, rate)
        return self.sqrt(concentration) / rate

    def _mode(self, concentration=None, rate=None):
        """
        The mode of the distribution.
        """
        concentration, rate = self._check_param_type(concentration, rate)
        mode = (concentration - 1.) / rate
        nan = self.fill(self.dtypeop(concentration), self.shape(concentration), np.nan)
        comp = self.greater(concentration, 1.)
        return self.select(comp, mode, nan)

    def _entropy(self, concentration=None, rate=None):
        r"""
        Evaluate entropy.

        .. math::
            H(X) = \alpha - \log(\beta) + \log(\Gamma(\alpha)) + (1 - \alpha) * \digamma(\alpha)
        """
        concentration, rate = self._check_param_type(concentration, rate)
        return concentration - self.log(rate) + self.lgamma(concentration) \
               + (1. - concentration) * self.digamma(concentration)

    def _cross_entropy(self, dist, concentration_b, rate_b, concentration_a=None, rate_a=None):
        r"""
        Evaluate cross entropy between Gamma distributions.

        Args:
            dist (str): Type of the distributions. Should be "Gamma" in this case.
            concentration_b (Tensor): concentration of distribution b.
            rate_b (Tensor): rate of distribution b.
            concentration_a (Tensor): concentration of distribution a. Default: self._concentration.
            rate_a (Tensor): rate of distribution a. Default: self._rate.
        """
        check_distribution_name(dist, 'Gamma')
        return self._entropy(concentration_a, rate_a) +\
               self._kl_loss(dist, concentration_b, rate_b, concentration_a, rate_a)

    def _log_prob(self, value, concentration=None, rate=None):
        r"""
        Evaluate log probability.

        Args:
            value (Tensor): The value to be evaluated.
            concentration (Tensor): The concentration of the distribution. Default: self._concentration.
            rate (Tensor): The rate the distribution. Default: self._rate.

        .. math::
            L(x) = (\alpha - 1) * \log(x) - \beta * x - \log(\gamma(\alpha)) - \alpha * \log(\beta)
        """
        value = self._check_value(value, 'value')
        value = self.cast(value, self.dtype)
        concentration, rate = self._check_param_type(concentration, rate)
        unnormalized_log_prob = (concentration - 1.) * self.log(value) - rate * value
        log_normalization = self.lgamma(concentration) - concentration * self.log(rate)
        return unnormalized_log_prob - log_normalization

    def _cdf(self, value, concentration=None, rate=None):
        r"""
        Evaluate the cumulative distribution function on the given value. Note that igamma returns
        the regularized incomplete gamma function, which is what we want for the CDF.

        Args:
            value (Tensor): The value to be evaluated.
            concentration (Tensor): The concentration of the distribution. Default: self._concentration.
            rate (Tensor): The rate the distribution. Default: self._rate.

        .. math::
            cdf(x) = \igamma(\alpha, \beta * x)
        """
        value = self._check_value(value, 'value')
        value = self.cast(value, self.dtype)
        concentration, rate = self._check_param_type(concentration, rate)
        return self.igamma(concentration, rate * value)

    def _kl_loss(self, dist, concentration_b, rate_b, concentration_a=None, rate_a=None):
        r"""
        Evaluate Gamma-Gamma KL divergence, i.e. KL(a||b).

        Args:
            dist (str): The type of the distributions. Should be "Gamma" in this case.
            concentration_b (Tensor): The concentration of distribution b.
            rate_b (Tensor): The rate distribution b.
            concentration_a (Tensor): The concentration of distribution a. Default: self._concentration.
            rate_a (Tensor): The rate distribution a. Default: self._rate.

        .. math::
            KL(a||b) = (\alpha_{a} - \alpha_{b}) * \digamma(\alpha_{a}) + \log(\gamma(\alpha_{b}))
                       - \log(\gamma(\alpha_{a})) + \alpha_{b} * \log(\beta{a}) - \alpha_{b} * \log(\beta{b})
                       + \alpha_{a} * \frac{\beta{b}}{\beta{a} - 1}
        """
        check_distribution_name(dist, 'Gamma')
        concentration_b = self._check_value(concentration_b, 'concentration_b')
        rate_b = self._check_value(rate_b, 'rate_b')
        concentration_b = self.cast(concentration_b, self.parameter_type)
        rate_b = self.cast(rate_b, self.parameter_type)
        concentration_a, rate_a = self._check_param_type(concentration_a, rate_a)
        return (concentration_a - concentration_b) * self.digamma(concentration_a) \
               + self.lgamma(concentration_b)  - self.lgamma(concentration_a) \
               + concentration_b * self.log(rate_a) - concentration_b * self.log(rate_b) \
               + concentration_a * (rate_b / rate_a - 1.)

    def _sample(self, shape=(), concentration=None, rate=None):
        """
        Sampling.

        Args:
            shape (tuple): The shape of the sample. Default: ().
            concentration (Tensor): The concentration of the samples. Default: self._concentration.
            rate (Tensor): The rate of the samples. Default: self._rate.

        Returns:
            Tensor, with the shape being shape + batch_shape.
        """
        shape = self.checktuple(shape, 'shape')
        concentration, rate = self._check_param_type(concentration, rate)
        batch_shape = self.shape(concentration + rate)
        origin_shape = shape + batch_shape
        if origin_shape == ():
            sample_shape = (1,)
        else:
            sample_shape = origin_shape
        sample_gamma = C.gamma(sample_shape, concentration, rate, self.seed)
        value = self.cast(sample_gamma, self.dtype)
        if origin_shape == ():
            value = self.squeeze(value)
        return value