Source code for mindarmour.attacks.iterative_gradient_method

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
""" Iterative gradient method attack. """
from abc import abstractmethod

import numpy as np

from mindspore.nn import SoftmaxCrossEntropyWithLogits
from mindspore import Tensor
from mindspore.nn import Cell

from mindarmour.attacks.attack import Attack
from mindarmour.attacks.gradient_method import FastGradientSignMethod
from mindarmour.utils.logger import LogUtil
from mindarmour.utils.util import WithLossCell
from mindarmour.utils.util import GradWrapWithLoss
from mindarmour.utils._check_param import check_pair_numpy_param, \
    normalize_value, check_model, check_value_positive, check_int_positive, \
    check_param_type, check_norm_level, check_param_multi_types

LOGGER = LogUtil.get_instance()
TAG = 'IterGrad'


def _reshape_l1_projection(values, eps=3):
    """
    `Implementation of L1 ball projection from:`_.

    .. _`Implementation of L1 ball projection from:`:
        https://stanford.edu/~jduchi/projects/DuchiShSiCh08.pdf

    Args:
        values (numpy.ndarray): Input data reshape into 2-dims.
        eps (float): L1 radius. Default: 3.

    Returns:
        numpy.ndarray, containing the projection.
    """
    abs_x = np.abs(values)
    abs_x = np.sum(abs_x, axis=1)
    indexes_b = (abs_x > eps)
    x_b = values[indexes_b]
    batch_size_b = x_b.shape[0]
    if batch_size_b == 0:
        return values

    # make the projection on l1 ball for elements outside the ball
    b_mu = -np.sort(-np.abs(x_b), axis=1)
    b_vv = np.arange(x_b.shape[1]).astype(np.float)
    b_st = (np.cumsum(b_mu, axis=1)-eps)/(b_vv+1)
    selected = (b_mu - b_st) > 0
    rho = np.sum((np.cumsum((1-selected), axis=1) == 0), axis=1)-1
    theta = np.take_along_axis(b_st, np.expand_dims(rho, axis=1), axis=1)
    proj_x_b = np.maximum(0, np.abs(x_b)-theta)*np.sign(x_b)

    # gather all the projected batch
    proj_x = np.copy(values)
    proj_x[indexes_b] = proj_x_b
    return proj_x


def _projection(values, eps, norm_level):
    """
    Implementation of values normalization within eps.

    Args:
        values (numpy.ndarray): Input data.
        eps (float): Project radius.
        norm_level (Union[int, char, numpy.inf]): Order of the norm. Possible
            values: np.inf, 1 or 2.

    Returns:
        numpy.ndarray, normalized values.

    Raises:
        NotImplementedError: If the norm_level is not in [1, 2, np.inf, '1',
            '2', 'inf'].
    """
    if norm_level in (1, '1'):
        sample_batch = values.shape[0]
        x_flat = values.reshape(sample_batch, -1)
        proj_flat = _reshape_l1_projection(x_flat, eps)
        return proj_flat.reshape(values.shape)
    if norm_level in (2, '2'):
        return eps*normalize_value(values, norm_level)
    if norm_level in (np.inf, 'inf'):
        return eps*np.sign(values)
    msg = 'Values of `norm_level` different from 1, 2 and `np.inf` are ' \
          'currently not supported.'
    LOGGER.error(TAG, msg)
    raise NotImplementedError(msg)


[docs]class IterativeGradientMethod(Attack):
    """
    Abstract base class for all iterative gradient based attacks.

    Args:
        network (Cell): Target model.
        eps (float): Proportion of adversarial perturbation generated by the
            attack to data range. Default: 0.3.
        eps_iter (float): Proportion of single-step adversarial perturbation
            generated by the attack to data range. Default: 0.1.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        nb_iter (int): Number of iteration. Default: 5.
        loss_fn (Loss): Loss function for optimization.
    """
    def __init__(self, network, eps=0.3, eps_iter=0.1, bounds=(0.0, 1.0), nb_iter=5,
                 loss_fn=None):
        super(IterativeGradientMethod, self).__init__()
        self._network = check_model('network', network, Cell)
        self._eps = check_value_positive('eps', eps)
        self._eps_iter = check_value_positive('eps_iter', eps_iter)
        self._nb_iter = check_int_positive('nb_iter', nb_iter)
        self._bounds = check_param_multi_types('bounds', bounds, [list, tuple])
        for b in self._bounds:
            _ = check_param_multi_types('bound', b, [int, float])
        if loss_fn is None:
            loss_fn = SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=False)
        self._loss_grad = GradWrapWithLoss(WithLossCell(self._network, loss_fn))
        self._loss_grad.set_train()

[docs]    @abstractmethod
    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input samples and original/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Raises:
            NotImplementedError: This function is not available in
                IterativeGradientMethod.

        Examples:
            >>> adv_x = attack.generate([[0.1, 0.9, 0.6],
            >>>                          [0.3, 0, 0.3]],
            >>>                         [[0, , 1, 0, 0, 0, 0, 0, 0, 0],
            >>>                          [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])
        """
        msg = 'The function generate() is an abstract method in class ' \
              '`IterativeGradientMethod`, and should be implemented ' \
              'in child class.'
        LOGGER.error(TAG, msg)
        raise NotImplementedError(msg)


[docs]class BasicIterativeMethod(IterativeGradientMethod):
    """
    The Basic Iterative Method attack, an iterative FGSM method to generate
    adversarial examples.

    References: `A. Kurakin, I. Goodfellow, and S. Bengio, "Adversarial examples
    in the physical world," in ICLR, 2017 <https://arxiv.org/abs/1607.02533>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of adversarial perturbation generated by the
            attack to data range. Default: 0.3.
        eps_iter (float): Proportion of single-step adversarial perturbation
            generated by the attack to data range. Default: 0.1.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        nb_iter (int): Number of iteration. Default: 5.
        loss_fn (Loss): Loss function for optimization.
        attack (class): The single step gradient method of each iteration. In
            this class, FGSM is used.

    Examples:
        >>> attack = BasicIterativeMethod(network)
    """

    def __init__(self, network, eps=0.3, eps_iter=0.1, bounds=(0.0, 1.0),
                 is_targeted=False, nb_iter=5, loss_fn=None):
        super(BasicIterativeMethod, self).__init__(network,
                                                   eps=eps,
                                                   eps_iter=eps_iter,
                                                   bounds=bounds,
                                                   nb_iter=nb_iter,
                                                   loss_fn=loss_fn)
        self._is_targeted = check_param_type('is_targeted', is_targeted, bool)
        self._attack = FastGradientSignMethod(self._network,
                                              eps=self._eps_iter,
                                              bounds=self._bounds,
                                              is_targeted=self._is_targeted,
                                              loss_fn=loss_fn)

[docs]    def generate(self, inputs, labels):

        """
        Simple iterative FGSM method to generate adversarial examples.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.3, 0.2, 0.6],
            >>>                          [0.3, 0.2, 0.4]],
            >>>                         [[0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
            >>>                          [0, 0, 0, 0, 0, 0, 1, 0, 0, 0]])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        arr_x = inputs
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                perturs = np.clip(adv_x - arr_x, (0 - self._eps)*clip_diff,
                                  self._eps*clip_diff)
                adv_x = arr_x + perturs
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x
        return adv_x


[docs]class MomentumIterativeMethod(IterativeGradientMethod):
    """
    The Momentum Iterative Method attack.

    References: `Y. Dong, et al., "Boosting adversarial attacks with
    momentum," arXiv:1710.06081, 2017 <https://arxiv.org/abs/1710.06081>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of adversarial perturbation generated by the
            attack to data range. Default: 0.3.
        eps_iter (float): Proportion of single-step adversarial perturbation
            generated by the attack to data range. Default: 0.1.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        nb_iter (int): Number of iteration. Default: 5.
        decay_factor (float): Decay factor in iterations. Default: 1.0.
        norm_level (Union[int, numpy.inf]): Order of the norm. Possible values:
            np.inf, 1 or 2. Default: 'inf'.
        loss_fn (Loss): Loss function for optimization.
    """

    def __init__(self, network, eps=0.3, eps_iter=0.1, bounds=(0.0, 1.0),
                 is_targeted=False, nb_iter=5, decay_factor=1.0,
                 norm_level='inf', loss_fn=None):
        super(MomentumIterativeMethod, self).__init__(network,
                                                      eps=eps,
                                                      eps_iter=eps_iter,
                                                      bounds=bounds,
                                                      nb_iter=nb_iter,
                                                      loss_fn=loss_fn)
        self._is_targeted = check_param_type('is_targeted', is_targeted, bool)
        self._decay_factor = check_value_positive('decay_factor', decay_factor)
        self._norm_level = check_norm_level(norm_level)

[docs]    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and origin/target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.5, 0.2, 0.6],
            >>>                          [0.3, 0, 0.2]],
            >>>                         [[0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
            >>>                          [0, 0, 0, 0, 0, 1, 0, 0, 0, 0]])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        arr_x = inputs
        momentum = 0
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                gradient = self._gradient(inputs, labels)
                momentum = self._decay_factor*momentum + gradient
                adv_x = inputs + self._eps_iter*np.sign(momentum)
                perturs = np.clip(adv_x - arr_x, (0 - self._eps)*clip_diff,
                                  self._eps*clip_diff)
                adv_x = arr_x + perturs
                adv_x = np.clip(adv_x, clip_min, clip_max)
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                gradient = self._gradient(inputs, labels)
                momentum = self._decay_factor*momentum + gradient
                adv_x = inputs + self._eps_iter*np.sign(momentum)
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x

        return adv_x

    def _gradient(self, inputs, labels):
        """
        Calculate the gradient of input samples.

        Args:
            inputs (numpy.ndarray): Input samples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, gradient of labels w.r.t inputs.

        Examples:
            >>> grad = self._gradient([[0.5, 0.3, 0.4]],
            >>>                       [[0, 0, 0, 1, 0, 0, 0, 0, 0, 0])
        """
        sens = Tensor(np.array([1.0], inputs.dtype))
        # get grad of loss over x
        out_grad = self._loss_grad(Tensor(inputs), Tensor(labels), sens)
        if isinstance(out_grad, tuple):
            out_grad = out_grad[0]
        gradient = out_grad.asnumpy()

        if self._is_targeted:
            gradient = -gradient
        return normalize_value(gradient, self._norm_level)


[docs]class ProjectedGradientDescent(BasicIterativeMethod):
    """
    The Projected Gradient Descent attack is a variant of the Basic Iterative
    Method in which, after each iteration, the perturbation is projected on an
    lp-ball of specified radius (in addition to clipping the values of the
    adversarial sample so that it lies in the permitted data range). This is
    the attack proposed by Madry et al. for adversarial training.

    References: `A. Madry, et al., "Towards deep learning models resistant to
    adversarial attacks," in ICLR, 2018 <https://arxiv.org/abs/1706.06083>`_

    Args:
        network (Cell): Target model.
        eps (float): Proportion of adversarial perturbation generated by the
            attack to data range. Default: 0.3.
        eps_iter (float): Proportion of single-step adversarial perturbation
            generated by the attack to data range. Default: 0.1.
        bounds (tuple): Upper and lower bounds of data, indicating the data range.
            In form of (clip_min, clip_max). Default: (0.0, 1.0).
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: False.
        nb_iter (int): Number of iteration. Default: 5.
        norm_level (Union[int, numpy.inf]): Order of the norm. Possible values:
            np.inf, 1 or 2. Default: 'inf'.
        loss_fn (Loss): Loss function for optimization.
    """

    def __init__(self, network, eps=0.3, eps_iter=0.1, bounds=(0.0, 1.0),
                 is_targeted=False, nb_iter=5, norm_level='inf', loss_fn=None):
        super(ProjectedGradientDescent, self).__init__(network,
                                                       eps=eps,
                                                       eps_iter=eps_iter,
                                                       bounds=bounds,
                                                       is_targeted=is_targeted,
                                                       nb_iter=nb_iter,
                                                       loss_fn=loss_fn)
        self._norm_level = check_norm_level(norm_level)

[docs]    def generate(self, inputs, labels):
        """
        Iteratively generate adversarial examples based on BIM method. The
        perturbation is normalized by projected method with parameter norm_level .

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to
                create adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv_x = attack.generate([[0.6, 0.2, 0.6],
            >>>                          [0.3, 0.3, 0.4]],
            >>>                         [[0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
            >>>                          [1, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
        """
        inputs, labels = check_pair_numpy_param('inputs', inputs,
                                                'labels', labels)
        arr_x = inputs
        if self._bounds is not None:
            clip_min, clip_max = self._bounds
            clip_diff = clip_max - clip_min
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                perturs = _projection(adv_x - arr_x,
                                      self._eps,
                                      norm_level=self._norm_level)
                perturs = np.clip(perturs, (0 - self._eps)*clip_diff,
                                  self._eps*clip_diff)
                adv_x = arr_x + perturs
                inputs = adv_x
        else:
            for _ in range(self._nb_iter):
                adv_x = self._attack.generate(inputs, labels)
                perturs = _projection(adv_x - arr_x,
                                      self._eps,
                                      norm_level=self._norm_level)
                adv_x = arr_x + perturs
                adv_x = np.clip(adv_x, arr_x - self._eps, arr_x + self._eps)
                inputs = adv_x
        return adv_x