Source code for mindarmour.adv_robustness.attacks.gradient_method

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Gradient-method Attack.
"""
from abc import abstractmethod

import numpy as np

from mindspore.nn import Cell

from mindarmour.utils.util import WithLossCell, GradWrapWithLoss, to_tensor_tuple
from mindarmour.utils.logger import LogUtil
from mindarmour.utils._check_param import check_model, check_inputs_labels, \
    normalize_value, check_value_positive, check_param_multi_types, \
    check_norm_level, check_param_type
from .attack import Attack

LOGGER = LogUtil.get_instance()
TAG = 'SingleGrad'


class GradientMethod(Attack):
    """
    Abstract base class for all single-step gradient-based attacks.

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: None.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: None.
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = FastGradientMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=None, bounds=None,
                 loss_fn=None):
        super(GradientMethod, self).__init__()
        self._network = check_model('network', network, Cell)
        self._eps = check_value_positive('eps', eps)
        self._dtype = None
        if bounds is not None:
            self._bounds = check_param_multi_types('bounds', bounds,
                                                   [list, tuple])
            for b in self._bounds:
                _ = check_param_multi_types('bound', b, [int, float])
        else:
            self._bounds = bounds
        if alpha is not None:
            self._alpha = check_value_positive('alpha', alpha)
        else:
            self._alpha = alpha
        if loss_fn is None:
            self._grad_all = self._network
        else:
            with_loss_cell = WithLossCell(self._network, loss_fn)
            self._grad_all = GradWrapWithLoss(with_loss_cell)
        self._grad_all.set_train()

    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original/target labels.

        Args:
            inputs (Union[numpy.ndarray, tuple]): Benign input samples used as references to create
                    adversarial examples.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, generated adversarial examples.
        """
        inputs_image, inputs, labels = check_inputs_labels(inputs, labels)
        self._dtype = inputs_image.dtype
        gradient = self._gradient(inputs, labels)
        # use random method or not
        if self._alpha is not None:
            random_part = self._alpha*np.sign(np.random.normal(
                size=inputs.shape)).astype(self._dtype)
            perturbation = (self._eps - self._alpha)*gradient + random_part
        else:
            perturbation = self._eps*gradient

        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            perturbation = perturbation*(clip_max - clip_min)
            adv_x = inputs_image + perturbation
            adv_x = np.clip(adv_x, clip_min, clip_max)
        else:
            adv_x = inputs_image + perturbation
        return adv_x

    @abstractmethod
    def _gradient(self, inputs, labels):
        """
        Calculate gradients based on input samples and original/target labels.

        Args:
            inputs (Union[numpy.ndarray, tuple]): Benign input samples used as references to
                create adversarial examples.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Raises:
            NotImplementedError: It is an abstract method.
        """
        msg = 'The function _gradient() is an abstract method in class ' \
              '`GradientMethod`, and should be implemented in child class.'
        LOGGER.error(TAG, msg)
        raise NotImplementedError(msg)


[docs]class FastGradientMethod(GradientMethod):
    """
    This attack is a one-step attack based on gradients calculation, and
    the norm of perturbations includes L1, L2 and Linf.

    References: `I. J. Goodfellow, J. Shlens, and C. Szegedy, "Explaining
    and harnessing adversarial examples," in ICLR, 2015.
    <https://arxiv.org/abs/1412.6572>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: None.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        norm_level (Union[int, numpy.inf]): Order of the norm.
            Possible values: np.inf, 1 or 2. Default: 2.
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = FastGradientMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0),
                 norm_level=2, is_targeted=False, loss_fn=None):

        super(FastGradientMethod, self).__init__(network,
                                                 eps=eps,
                                                 alpha=alpha,
                                                 bounds=bounds,
                                                 loss_fn=loss_fn)
        self._norm_level = check_norm_level(norm_level)
        self._is_targeted = check_param_type('is_targeted', is_targeted, bool)

    def _gradient(self, inputs, labels):
        """
        Calculate gradients based on input samples and original/target labels.

        Args:
            inputs (Union[numpy.ndarray, tuple]): Input sample.
            labels (Union[numpy.ndarray, tuple]): Original/target labels. \
                For each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, gradient of inputs.
        """
        inputs_tensor = to_tensor_tuple(inputs)
        labels_tensor = to_tensor_tuple(labels)
        out_grad = self._grad_all(*inputs_tensor, *labels_tensor)
        if isinstance(out_grad, tuple):
            out_grad = out_grad[0]
        gradient = out_grad.asnumpy()

        if self._is_targeted:
            gradient = -gradient
        return normalize_value(gradient, self._norm_level)


[docs]class RandomFastGradientMethod(FastGradientMethod):
    """
    Fast Gradient Method use Random perturbation.

    References: `Florian Tramer, Alexey Kurakin, Nicolas Papernot, "Ensemble
    adversarial training: Attacks and defenses" in ICLR, 2018
    <https://arxiv.org/abs/1705.07204>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: 0.035.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        norm_level (Union[int, numpy.inf]): Order of the norm.
        Possible values: np.inf, 1 or 2. Default: 2.
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Raises:
        ValueError: eps is smaller than alpha!

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = RandomFastGradientMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0),
                 norm_level=2, is_targeted=False, loss_fn=None):
        if eps < alpha:
            raise ValueError('eps must be larger than alpha!')
        super(RandomFastGradientMethod, self).__init__(network,
                                                       eps=eps,
                                                       alpha=alpha,
                                                       bounds=bounds,
                                                       norm_level=norm_level,
                                                       is_targeted=is_targeted,
                                                       loss_fn=loss_fn)


[docs]class FastGradientSignMethod(GradientMethod):
    """
    Use the sign instead of the value of the gradient to the input. This attack is
    often referred to as Fast Gradient Sign Method and was introduced previously.

    References: `Ian J. Goodfellow, J. Shlens, and C. Szegedy, "Explaining
    and harnessing adversarial examples," in ICLR, 2015
    <https://arxiv.org/abs/1412.6572>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: None.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = FastGradientSignMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0),
                 is_targeted=False, loss_fn=None):
        super(FastGradientSignMethod, self).__init__(network,
                                                     eps=eps,
                                                     alpha=alpha,
                                                     bounds=bounds,
                                                     loss_fn=loss_fn)
        self._is_targeted = check_param_type('is_targeted', is_targeted, bool)

    def _gradient(self, inputs, labels):
        """
        Calculate gradients based on input samples and original/target
        labels.

        Args:
            inputs (Union[numpy.ndarray, tuple]): Input samples.
            labels (Union[numpy.ndarray, tuple]): original/target labels. \
                for each input if it has more than one label, it is wrapped in a tuple.

        Returns:
            numpy.ndarray, gradient of inputs.
        """
        inputs_tensor = to_tensor_tuple(inputs)
        labels_tensor = to_tensor_tuple(labels)
        out_grad = self._grad_all(*inputs_tensor, *labels_tensor)
        if isinstance(out_grad, tuple):
            out_grad = out_grad[0]
        gradient = out_grad.asnumpy()
        if self._is_targeted:
            gradient = -gradient
        gradient = np.sign(gradient)
        return gradient


[docs]class RandomFastGradientSignMethod(FastGradientSignMethod):
    """
    Fast Gradient Sign Method using random perturbation.

    References: `F. Tramer, et al., "Ensemble adversarial training: Attacks
    and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: 0.035.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        is_targeted (bool): True: targeted attack. False: untargeted attack.
            Default: False.
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Raises:
        ValueError: eps is smaller than alpha!

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = RandomFastGradientSignMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0),
                 is_targeted=False, loss_fn=None):
        if eps < alpha:
            raise ValueError('eps must be larger than alpha!')
        super(RandomFastGradientSignMethod, self).__init__(network,
                                                           eps=eps,
                                                           alpha=alpha,
                                                           bounds=bounds,
                                                           is_targeted=is_targeted,
                                                           loss_fn=loss_fn)


[docs]class LeastLikelyClassMethod(FastGradientSignMethod):
    """
    Least-Likely Class Method.

    References: `F. Tramer, et al., "Ensemble adversarial training: Attacks
    and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: None.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = LeastLikelyClassMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0),
                 loss_fn=None):
        super(LeastLikelyClassMethod, self).__init__(network,
                                                     eps=eps,
                                                     alpha=alpha,
                                                     bounds=bounds,
                                                     is_targeted=True,
                                                     loss_fn=loss_fn)


[docs]class RandomLeastLikelyClassMethod(FastGradientSignMethod):
    """
    Least-Likely Class Method use Random perturbation.

    References: `F. Tramer, et al., "Ensemble adversarial training: Attacks
    and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: 0.035.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        loss_fn (Loss): Loss function for optimization. If None, the input network \
            is already equipped with loss function. Default: None.

    Raises:
        ValueError: eps is smaller than alpha!

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = RandomLeastLikelyClassMethod(network, loss_fn=SoftmaxCrossEntropyWithLogits(sparse=False))
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0),
                 loss_fn=None):
        if eps < alpha:
            raise ValueError('eps must be larger than alpha!')
        super(RandomLeastLikelyClassMethod, self).__init__(network,
                                                           eps=eps,
                                                           alpha=alpha,
                                                           bounds=bounds,
                                                           is_targeted=True,
                                                           loss_fn=loss_fn)