Source code for mindarmour.attacks.gradient_method

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Gradient-method Attack.
"""
from abc import abstractmethod

import numpy as np

from mindspore import Tensor
from mindspore.nn import Cell
from mindspore.nn import SoftmaxCrossEntropyWithLogits

from mindarmour.attacks.attack import Attack
from mindarmour.utils.util import WithLossCell
from mindarmour.utils.util import GradWrapWithLoss
from mindarmour.utils.logger import LogUtil
from mindarmour.utils._check_param import check_pair_numpy_param, check_model, \
    normalize_value, check_value_positive, check_param_multi_types, \
    check_norm_level, check_param_type

LOGGER = LogUtil.get_instance()
TAG = 'SingleGrad'


class GradientMethod(Attack):
    """
    Abstract base class for all single-step gradient-based attacks.

    Args:
        network (Cell): Target model.
        eps (float): Proportion of single-step adversarial perturbation generated
            by the attack to data range. Default: 0.07.
        alpha (float): Proportion of single-step random perturbation to data range.
            Default: None.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: None.
        loss_fn (Loss): Loss function for optimization. Default: None.

    Examples:
        >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]])
        >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]])
        >>> attack = FastGradientMethod(network)
        >>> adv_x = attack.generate(inputs, labels)
    """

    def __init__(self, network, eps=0.07, alpha=None, bounds=None,
                 loss_fn=None):
        super(GradientMethod, self).__init__()
        self._network = check_model('network', network, Cell)
        self._eps = check_value_positive('eps', eps)
        self._dtype = None
        if bounds is not None:
            self._bounds = check_param_multi_types('bounds', bounds,
                                                   [list, tuple])
            for b in self._bounds:
                _ = check_param_multi_types('bound', b, [int, float])
        else:
            self._bounds = bounds
        if alpha is not None:
            self._alpha = check_value_positive('alpha', alpha)
        else:
            self._alpha = alpha
        if loss_fn is None:
            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False,
                                                    sparse=False)
        with_loss_cell = WithLossCell(self._network, loss_fn)
        self._grad_all = GradWrapWithLoss(with_loss_cell)
        self._grad_all.set_train()

    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                    adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        self._dtype = inputs.dtype
        gradient = self._gradient(inputs, labels)
        # use random method or not
        if self._alpha is not None:
            random_part = self._alpha*np.sign(np.random.normal(
                size=inputs.shape)).astype(self._dtype)
            perturbation = (self._eps - self._alpha)*gradient + random_part
        else:
            perturbation = self._eps*gradient

        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            perturbation = perturbation*(clip_max - clip_min)
            adv_x = inputs + perturbation
            adv_x = np.clip(adv_x, clip_min, clip_max)
        else:
            adv_x = inputs + perturbation
        return adv_x

    @abstractmethod
    def _gradient(self, inputs, labels):
        """
        Calculate gradients based on input samples and original/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Raises:
            NotImplementedError: It is an abstract method.
        """
        msg = 'The function _gradient() is an abstract method in class ' \
              '`GradientMethod`, and should be implemented in child class.'
        LOGGER.error(TAG, msg)
        raise NotImplementedError(msg)


[docs]class FastGradientMethod(GradientMethod): """ This attack is a one-step attack based on gradients calculation, and the norm of perturbations includes L1, L2 and Linf. References: `I. J. Goodfellow, J. Shlens, and C. Szegedy, "Explaining and harnessing adversarial examples," in ICLR, 2015. <https://arxiv.org/abs/1412.6572>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: None. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). norm_level (Union[int, numpy.inf]): Order of the norm. Possible values: np.inf, 1 or 2. Default: 2. is_targeted (bool): If True, targeted attack. If False, untargeted attack. Default: False. loss_fn (Loss): Loss function for optimization. Default: None. Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = FastGradientMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0), norm_level=2, is_targeted=False, loss_fn=None): super(FastGradientMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, loss_fn=loss_fn) self._norm_level = check_norm_level(norm_level) self._is_targeted = check_param_type('is_targeted', is_targeted, bool) def _gradient(self, inputs, labels): """ Calculate gradients based on input samples and original/target labels. Args: inputs (numpy.ndarray): Input sample. labels (numpy.ndarray): Original/target label. Returns: numpy.ndarray, gradient of inputs. """ out_grad = self._grad_all(Tensor(inputs), Tensor(labels)) if isinstance(out_grad, tuple): out_grad = out_grad[0] gradient = out_grad.asnumpy() if self._is_targeted: gradient = -gradient return normalize_value(gradient, self._norm_level)
[docs]class RandomFastGradientMethod(FastGradientMethod): """ Fast Gradient Method use Random perturbation. References: `Florian Tramer, Alexey Kurakin, Nicolas Papernot, "Ensemble adversarial training: Attacks and defenses" in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: 0.035. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). norm_level (Union[int, numpy.inf]): Order of the norm. Possible values: np.inf, 1 or 2. Default: 2. is_targeted (bool): If True, targeted attack. If False, untargeted attack. Default: False. loss_fn (Loss): Loss function for optimization. Default: None. Raises: ValueError: eps is smaller than alpha! Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = RandomFastGradientMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0), norm_level=2, is_targeted=False, loss_fn=None): if eps < alpha: raise ValueError('eps must be larger than alpha!') super(RandomFastGradientMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, norm_level=norm_level, is_targeted=is_targeted, loss_fn=loss_fn)
[docs]class FastGradientSignMethod(GradientMethod): """ Use the sign instead of the value of the gradient to the input. This attack is often referred to as Fast Gradient Sign Method and was introduced previously. References: `Ian J. Goodfellow, J. Shlens, and C. Szegedy, "Explaining and harnessing adversarial examples," in ICLR, 2015 <https://arxiv.org/abs/1412.6572>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: None. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). is_targeted (bool): If True, targeted attack. If False, untargeted attack. Default: False. loss_fn (Loss): Loss function for optimization. Default: None. Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = FastGradientSignMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0), is_targeted=False, loss_fn=None): super(FastGradientSignMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, loss_fn=loss_fn) self._is_targeted = check_param_type('is_targeted', is_targeted, bool) def _gradient(self, inputs, labels): """ Calculate gradients based on input samples and original/target labels. Args: inputs (numpy.ndarray): Input samples. labels (numpy.ndarray): Original/target labels. Returns: numpy.ndarray, gradient of inputs. """ out_grad = self._grad_all(Tensor(inputs), Tensor(labels)) if isinstance(out_grad, tuple): out_grad = out_grad[0] gradient = out_grad.asnumpy() if self._is_targeted: gradient = -gradient gradient = np.sign(gradient) return gradient
[docs]class RandomFastGradientSignMethod(FastGradientSignMethod): """ Fast Gradient Sign Method using random perturbation. References: `F. Tramer, et al., "Ensemble adversarial training: Attacks and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: 0.035. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). is_targeted (bool): True: targeted attack. False: untargeted attack. Default: False. loss_fn (Loss): Loss function for optimization. Default: None. Raises: ValueError: eps is smaller than alpha! Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = RandomFastGradientSignMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0), is_targeted=False, loss_fn=None): if eps < alpha: raise ValueError('eps must be larger than alpha!') super(RandomFastGradientSignMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, is_targeted=is_targeted, loss_fn=loss_fn)
[docs]class LeastLikelyClassMethod(FastGradientSignMethod): """ Least-Likely Class Method. References: `F. Tramer, et al., "Ensemble adversarial training: Attacks and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: None. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). loss_fn (Loss): Loss function for optimization. Default: None. Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = LeastLikelyClassMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=None, bounds=(0.0, 1.0), loss_fn=None): super(LeastLikelyClassMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, is_targeted=True, loss_fn=loss_fn)
[docs]class RandomLeastLikelyClassMethod(FastGradientSignMethod): """ Least-Likely Class Method use Random perturbation. References: `F. Tramer, et al., "Ensemble adversarial training: Attacks and defenses," in ICLR, 2018 <https://arxiv.org/abs/1705.07204>`_ Args: network (Cell): Target model. eps (float): Proportion of single-step adversarial perturbation generated by the attack to data range. Default: 0.07. alpha (float): Proportion of single-step random perturbation to data range. Default: 0.035. bounds (tuple): Upper and lower bounds of data, indicating the data range. In form of (clip_min, clip_max). Default: (0.0, 1.0). loss_fn (Loss): Loss function for optimization. Raises: ValueError: eps is smaller than alpha! Examples: >>> inputs = np.array([[0.1, 0.2, 0.6], [0.3, 0, 0.4]]) >>> labels = np.array([[0, 1, 0, 0, 0], [0, 0, 1, 0, 0]]) >>> attack = RandomLeastLikelyClassMethod(network) >>> adv_x = attack.generate(inputs, labels) """ def __init__(self, network, eps=0.07, alpha=0.035, bounds=(0.0, 1.0), loss_fn=None): if eps < alpha: raise ValueError('eps must be larger than alpha!') super(RandomLeastLikelyClassMethod, self).__init__(network, eps=eps, alpha=alpha, bounds=bounds, is_targeted=True, loss_fn=loss_fn)