Source code for mindarmour.adv_robustness.attacks.lbfgs

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
LBFGS-Attack.
"""
import numpy as np
import scipy.optimize as so

from mindspore import Tensor
from mindspore.nn import Cell
from mindspore.nn import SoftmaxCrossEntropyWithLogits

from mindarmour.utils.logger import LogUtil
from mindarmour.utils.util import WithLossCell, GradWrapWithLoss
from mindarmour.utils._check_param import check_pair_numpy_param, check_model, \
    check_int_positive, check_value_positive, check_param_type, \
    check_param_multi_types
from .attack import Attack

LOGGER = LogUtil.get_instance()
TAG = 'LBFGS'


[docs]class LBFGS(Attack):
    """
    Uses L-BFGS-B to minimize the distance between the input and the adversarial example.

    References: `Pedro Tabacof, Eduardo Valle. "Exploring the Space of
    Adversarial Images" <https://arxiv.org/abs/1510.05328>`_

    Args:
        network (Cell): The network of attacked model.
        eps (float): Attack step size. Default: 1e-5.
        bounds (tuple): Upper and lower bounds of data. Default: (0.0, 1.0)
        is_targeted (bool): If True, targeted attack. If False, untargeted
            attack. Default: True.
        nb_iter (int): Number of iteration of lbfgs-optimizer, which should be
            greater than zero. Default: 150.
        search_iters (int): Number of changes in step size, which should be
            greater than zero. Default: 30.
        loss_fn (Functions): Loss function of substitute model. Default: None.
        sparse (bool): If True, input labels are sparse-coded. If False,
            input labels are onehot-coded. Default: False.

    Examples:
        >>> attack = LBFGS(network)
    """
    def __init__(self, network, eps=1e-5, bounds=(0.0, 1.0), is_targeted=True,
                 nb_iter=150, search_iters=30, loss_fn=None, sparse=False):
        super(LBFGS, self).__init__()
        self._network = check_model('network', network, Cell)
        self._eps = check_value_positive('eps', eps)
        self._is_targeted = check_param_type('is_targeted', is_targeted, bool)
        self._nb_iter = check_int_positive('nb_iter', nb_iter)
        self._search_iters = check_int_positive('search_iters', search_iters)
        if loss_fn is None:
            loss_fn = SoftmaxCrossEntropyWithLogits(sparse=False)
        with_loss_cell = WithLossCell(self._network, loss_fn)
        self._grad_all = GradWrapWithLoss(with_loss_cell)
        self._dtype = None
        self._bounds = check_param_multi_types('bounds', bounds, [list, tuple])
        self._sparse = check_param_type('sparse', sparse, bool)
        for b in self._bounds:
            _ = check_param_multi_types('bound', b, [int, float])
        box_max, box_min = bounds
        if box_max < box_min:
            self._box_min = box_max
            self._box_max = box_min
        else:
            self._box_min = box_min
            self._box_max = box_max

[docs]    def generate(self, inputs, labels):
        """
        Generate adversarial examples based on input data and target labels.

        Args:
            inputs (numpy.ndarray): Benign input samples used as references to create
                adversarial examples.
            labels (numpy.ndarray): Original/target labels.

        Returns:
            numpy.ndarray, generated adversarial examples.

        Examples:
            >>> adv = attack.generate([[0.1, 0.2, 0.6], [0.3, 0, 0.4]], [2, 2])
        """
        LOGGER.debug(TAG, 'start to generate adv image.')
        arr_x, arr_y = check_pair_numpy_param('inputs', inputs, 'labels', labels)
        self._dtype = arr_x.dtype
        adv_list = list()
        for original_x, label_y in zip(arr_x, arr_y):
            adv_list.append(self._optimize(
                original_x, label_y, epsilon=self._eps))
        return np.array(adv_list)

    def _forward_one(self, cur_input):
        """Forward one sample in model."""
        cur_input = np.expand_dims(cur_input, axis=0)
        out_logits = self._network(Tensor(cur_input)).asnumpy()
        return out_logits

    def _gradient(self, cur_input, labels, shape):
        """ Return model gradient to minimize loss in l-bfgs-b."""
        label_dtype = labels.dtype
        labels = np.expand_dims(labels, axis=0).astype(label_dtype)
        # input shape should like original shape
        reshape_input = np.expand_dims(cur_input.reshape(shape),
                                       axis=0)
        out_grad = self._grad_all(Tensor(reshape_input), Tensor(labels))
        if isinstance(out_grad, tuple):
            out_grad = out_grad[0]
        return out_grad.asnumpy()

    def _loss(self, cur_input, start_input, cur_eps, shape, labels):
        """
        The l-bfgs-b loss is the sum of l2 distances to the original input plus
        the cross-entropy loss.
        """
        cur_input = cur_input.astype(self._dtype)
        l2_distance = np.linalg.norm(
            cur_input.reshape((cur_input.shape[0], -1)) - start_input.reshape(
                (start_input.shape[0], -1)))
        logits = self._forward_one(cur_input.reshape(shape)).flatten()
        logits = logits - np.max(logits)
        if self._sparse:
            target_class = labels
        else:
            target_class = np.argmax(labels)
        if self._is_targeted:
            crossentropy = np.log(np.sum(np.exp(logits))) - logits[target_class]
            gradient = self._gradient(cur_input, labels, shape).flatten()
        else:
            crossentropy = logits[target_class] - np.log(np.sum(np.exp(logits)))
            gradient = -self._gradient(cur_input, labels, shape).flatten()

        return (l2_distance + cur_eps*crossentropy).astype(self._dtype), \
               gradient.astype(np.float64)

    def _lbfgsb(self, start_input, cur_eps, shape, labels, bounds):
        """
        A wrapper.
        Method reference to `scipy.optimize.fmin_l_bfgs_b`_

        .. _`scipy.optimize.fmin_l_bfgs_b`: https://docs.scipy.org/doc/scipy/
        reference/generated/scipy.optimize.fmin_l_bfgs_b.html
        """
        approx_grad_eps = (self._box_max - self._box_min) / 100
        max_matrix_variable = 15
        cur_input, _, detail_info = so.fmin_l_bfgs_b(
            self._loss,
            start_input,
            args=(start_input, cur_eps, shape, labels),
            approx_grad=False,
            bounds=bounds,
            m=max_matrix_variable,
            maxiter=self._nb_iter,
            epsilon=approx_grad_eps)

        LOGGER.debug(TAG, str(detail_info))
        # LBFGS-B does not always exactly respect the boundaries
        if np.amax(cur_input) > self._box_max or np.amin(
                cur_input) < self._box_min:  # pragma: no coverage
            LOGGER.debug(TAG,
                         'Input out of bounds (min, max = %s, %s).'
                         ' Performing manual clip.',
                         np.amin(cur_input),
                         np.amax(cur_input))
            cur_input = np.clip(cur_input, self._box_min, self._box_max)
        cur_input = cur_input.astype(self._dtype)
        cur_input = cur_input.reshape(shape)
        adv_prediction = self._forward_one(cur_input)

        LOGGER.debug(TAG, 'input one sample label is :{}'.format(labels))
        if not self._sparse:
            labels = np.argmax(labels)
        if self._is_targeted:
            return cur_input, np.argmax(adv_prediction) == labels
        return cur_input, np.argmax(adv_prediction) != labels

    def _optimize(self, start_input, labels, epsilon):
        """
        Given loss fuction and gradient, use l_bfgs_b algorithm to update input
        sample. The epsilon will be doubled until an adversarial example is found.

        Args:
            start_input (numpy.ndarray): Benign input samples used as references
                to create adversarial examples.
            labels (numpy.ndarray): Target labels.
            epsilon: (float): Attack step size.
            max_iter (int): Number of iteration.
        """
        # store the shape for later and operate on the flattened input
        ori_shape = start_input.shape
        start_input = start_input.flatten().astype(self._dtype)
        bounds = [self._bounds]*len(start_input)

        # finding initial cur_eps
        iter_c = epsilon
        for _ in range(self._search_iters):
            iter_c = 2*iter_c
            generate_x, is_adversarial = self._lbfgsb(start_input,
                                                      iter_c,
                                                      ori_shape,
                                                      labels,
                                                      bounds)
            LOGGER.debug(TAG, 'Tested iter_c = %f', iter_c)
            if is_adversarial:
                LOGGER.debug(TAG, 'find adversarial successfully.')
                return generate_x
            LOGGER.debug(TAG, 'failed to not adversarial.')
        return generate_x