Source code for mindspore_xai.explainer.backprop.gradcam

# Copyright 2020-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""GradCAM."""

from mindspore.ops import operations as op
from mindspore import get_context, PYNATIVE_MODE

from mindspore_xai.common.utils import ForwardProbe, retrieve_layer, unify_inputs, unify_targets
from .backprop_utils import GradNet
from .intermediate_layer import IntermediateLayerAttribution


def _gradcam_aggregation(attributions):
    """
    Aggregate the gradient and activation to get the final attribution.

    Args:
        attributions (Tensor): the attribution with channel dimension.

    Returns:
        Tensor: the attribution with channel dimension aggregated.
    """
    sum_ = op.ReduceSum(keep_dims=False)
    relu_ = op.ReLU()
    attributions = relu_(sum_(attributions, 1))
    attributions = op.Reshape()(attributions, (attributions.shape[0], 1, *attributions.shape[1:]))
    return attributions


[docs]class GradCAM(IntermediateLayerAttribution):
    r"""
    Provides GradCAM explanation method.

    `GradCAM` generates saliency map at intermediate layer. The attribution is obtained as:

    .. math::

        \alpha_k^c = \frac{1}{Z} \sum_i \sum_j \frac{\partial{y^c}}{\partial{A_{i,j}^k}}

        attribution = ReLU(\sum_k \alpha_k^c A^k)

    For more details, please refer to the original paper: `GradCAM <https://openaccess.thecvf.com/content_ICCV_2017/
    papers/Selvaraju_Grad-CAM_Visual_Explanations_ICCV_2017_paper.pdf>`_.

    Note:
        The parsed `network` will be set to eval mode through `network.set_grad(False)` and `network.set_train(False)`.
        If you want to train the `network` afterwards, please reset it back to training mode through the opposite
        operations.

    Args:
        network (Cell): The black-box model to be explained.
        layer (str, optional): The layer name to generate the explanation, usually chosen as the last convolutional
            layer for better practice. If it is ``''``, the explanation will be generated at the input layer.
            Default: ``''``.

    Inputs:
        - **inputs** (Tensor) - The input data to be explained, a 4D tensor of shape :math:`(N, C, H, W)`.
        - **targets** (Tensor, int, tuple, list) - The label of interest. It should be a 1D or scalar tensor, or an
          integer, or a tuple/list of integers. If it is a 1D tensor, tuple or list, its length should be :math:`N`.
        - **ret** (str, optional): The return object type. ``'tensor'`` means returns a Tensor object, ``'image'``
          means return a PIL.Image.Image list. Default: ``'tensor'``.
        - **show** (bool, optional): Show the saliency images, ``None`` means automatically show the saliency images
          if it is running on JupyterLab. Default: ``None``.

    Outputs:
        Tensor, a 4D tensor of shape :math:`(N, 1, H, W)`, saliency maps. Or list[list[PIL.Image.Image]], the
        normalized saliency images if `ret` was set to ``'image'``.

    Raises:
        TypeError: Be raised for any argument or input type problem.
        ValueError: Be raised for any input value problem.

    Supported Platforms:
        ``Ascend`` ``GPU``

    Examples:
        >>> import numpy as np
        >>> import mindspore as ms
        >>> from mindspore_xai.explainer import GradCAM
        >>> from mindspore import set_context, PYNATIVE_MODE
        >>>
        >>> # only PYNATIVE_MODE is supported
        >>> set_context(mode=PYNATIVE_MODE)
        >>> # The detail of LeNet5 is shown in models.official.cv.lenet.src.lenet.py
        >>> net = LeNet5(10, num_channel=3)
        >>> # specify a layer name to generate explanation, usually the layer can be set as the last conv layer.
        >>> layer_name = 'conv2'
        >>> # init GradCAM with a trained network and specify the layer to obtain attribution
        >>> gradcam = GradCAM(net, layer=layer_name)
        >>> inputs = ms.Tensor(np.random.rand(1, 3, 32, 32), ms.float32)
        >>> label = 5
        >>> saliency = gradcam(inputs, label)
        >>> print(saliency.shape)
        (1, 1, 32, 32)
    """

    def __init__(self, network, layer=""):
        super(GradCAM, self).__init__(network, layer)

        self._saliency_cell = retrieve_layer(self._backward_model, target_layer=layer)
        self._avgpool = op.ReduceMean(keep_dims=True)
        self._intermediate_grad = None
        self._aggregation_fn = _gradcam_aggregation
        self._resize_mode = 'bilinear'

        self._grad_net = GradNet(self._backward_model)
        self._hook_cell()

    def _hook_cell(self):
        if get_context("mode") != PYNATIVE_MODE:
            raise TypeError(f"Hook is not supported in graph mode currently, you can use"
                            f"'set_context(mode=PYNATIVE_MODE)'to set pynative mode.")
        if self._saliency_cell:
            self._saliency_cell.register_backward_hook(self._cell_hook_fn)
            self._saliency_cell.enable_hook = True
        self._intermediate_grad = None

    def _cell_hook_fn(self, _, grad_input, grad_output):
        """
        Hook function to deal with the backward gradient.

        The arguments are set as required by `Cell.register_backward_hook`.
        """
        del grad_output
        self._intermediate_grad = grad_input

    def __call__(self, inputs, targets, ret='tensor', show=None):
        """Call function for `GradCAM`."""
        self._verify_data(inputs, targets)
        self._verify_other_args(ret, show)

        with ForwardProbe(self._saliency_cell) as probe:

            inputs = unify_inputs(inputs)
            targets = unify_targets(inputs[0].shape[0], targets)

            weights = self._get_bp_weights(inputs, targets)
            gradients = self._grad_net(*inputs, weights)
            # get intermediate activation
            activation = (probe.value,)

            if self._layer == "":
                activation = inputs
                self._intermediate_grad = unify_inputs(gradients)
            if self._intermediate_grad is not None:
                # average pooling on gradients
                intermediate_grad = unify_inputs(
                    self._avgpool(self._intermediate_grad[0], (2, 3)))
            else:
                raise ValueError("Gradient for intermediate layer is not "
                                 "obtained")
            mul = op.Mul()

            if len(intermediate_grad) != 1 or len(activation) != 1:
                raise ValueError("Length of `intermediate_grad` and `activation` must be 1.")

            # manually braodcast
            intermediate_grad = op.Tile()(intermediate_grad[0], (1, 1, *activation[0].shape[2:]))
            attribution = self._aggregation_fn(mul(intermediate_grad, activation[0]))

            if self._resize:
                attribution = self._resize_fn(attribution, *inputs,
                                              mode=self._resize_mode)
            self._intermediate_grad = None

        return self._postproc_saliency(attribution, ret, show)