Source code for mindarmour.attacks.black.hop_skip_jump_attack

# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Hop-skip-jump attack.
"""
import numpy as np

from mindarmour.attacks.attack import Attack
from mindarmour.utils.logger import LogUtil
from mindarmour.attacks.black.black_model import BlackModel
from mindarmour.utils._check_param import check_pair_numpy_param, check_model, \
    check_numpy_param, check_int_positive, check_value_positive, \
    check_value_non_negative, check_param_type

LOGGER = LogUtil.get_instance()
TAG = 'HopSkipJumpAttack'


def _clip_image(image, clip_min, clip_max):
    """
    Clip an image, or an image batch, with upper and lower threshold.
    """
    return np.clip(image, clip_min, clip_max)


[docs]class HopSkipJumpAttack(Attack): """ HopSkipJumpAttack proposed by Chen, Jordan and Wainwright is a decision-based attack. The attack requires access to output labels of target model. References: `Chen J, Michael I. Jordan, Martin J. Wainwright. HopSkipJumpAttack: A Query-Efficient Decision-Based Attack. 2019. arXiv:1904.02144 <https://arxiv.org/abs/1904.02144>`_ Args: model (BlackModel): Target model. init_num_evals (int): The initial number of evaluations for gradient estimation. Default: 100. max_num_evals (int): The maximum number of evaluations for gradient estimation. Default: 1000. stepsize_search (str): Indicating how to search for stepsize; Possible values are 'geometric_progression', 'grid_search', 'geometric_progression'. num_iterations (int): The number of iterations. Default: 64. gamma (float): Used to set binary search threshold theta. Default: 1.0. For l2 attack the binary search threshold `theta` is: math:`gamma / d^{3/2}`. For linf attack is math:`gamma / d^2`. constraint (str): The norm distance to optimize. Possible values are 'l2', 'linf'. Default: l2. batch_size (int): Batch size. Default: 32. clip_min (float, optional): The minimum image component value. Default: 0. clip_max (float, optional): The maximum image component value. Default: 1. sparse (bool): If True, input labels are sparse-encoded. If False, input labels are one-hot-encoded. Default: True. Raises: ValueError: If stepsize_search not in ['geometric_progression', 'grid_search'] ValueError: If constraint not in ['l2', 'linf'] Examples: >>> x_test = np.asarray(np.random.random((sample_num, >>> sample_length)), np.float32) >>> y_test = np.random.randint(0, class_num, size=sample_num) >>> instance = HopSkipJumpAttack(user_model) >>> adv_x = instance.generate(x_test, y_test) """ def __init__(self, model, init_num_evals=100, max_num_evals=1000, stepsize_search='geometric_progression', num_iterations=20, gamma=1.0, constraint='l2', batch_size=32, clip_min=0.0, clip_max=1.0, sparse=True): super(HopSkipJumpAttack, self).__init__() self._model = check_model('model', model, BlackModel) self._init_num_evals = check_int_positive('initial_num_evals', init_num_evals) self._max_num_evals = check_int_positive('max_num_evals', max_num_evals) self._batch_size = check_int_positive('batch_size', batch_size) self._clip_min = check_value_non_negative('clip_min', clip_min) self._clip_max = check_value_non_negative('clip_max', clip_max) self._sparse = check_param_type('sparse', sparse, bool) self._np_dtype = np.dtype('float32') if stepsize_search in ['geometric_progression', 'grid_search']: self._stepsize_search = stepsize_search else: msg = "stepsize_search must be in ['geometric_progression'," \ " 'grid_search'], but got {}".format(stepsize_search) LOGGER.error(TAG, msg) raise ValueError(msg) self._num_iterations = check_int_positive('num_iterations', num_iterations) self._gamma = check_value_positive('gamma', gamma) if constraint in ['l2', 'linf']: self._constraint = constraint else: msg = "constraint must be in ['l2', 'linf'], " \ "but got {}".format(constraint) LOGGER.error(TAG, msg) raise ValueError(msg) self.queries = 0 self.is_adv = True self.y_targets = None self.image_targets = None self.y_target = None self.image_target = None def _generate_one(self, sample): """ Return a tensor that constructs adversarial examples for the given input. Args: sample (Tensor): Input samples. Returns: Tensor, generated adversarial examples. """ shape = list(np.shape(sample)) dim = int(np.prod(shape)) # Set binary search threshold. if self._constraint == 'l2': theta = self._gamma / (np.sqrt(dim)*dim) else: theta = self._gamma / (dim*dim) wrap = self._hsja(sample, self.y_target, self.image_target, dim, theta) if wrap is None: self.is_adv = False else: self.is_adv = True return self.is_adv, wrap, self.queries
[docs] def set_target_images(self, target_images): """ Setting target images for target attack. Args: target_images (numpy.ndarray): Target images. """ self.image_targets = check_numpy_param('target_images', target_images)
[docs] def generate(self, inputs, labels): """ Generate adversarial images in a for loop. Args: inputs (numpy.ndarray): Origin images. labels (numpy.ndarray): Target labels. Returns: - numpy.ndarray, bool values for each attack result. - numpy.ndarray, generated adversarial examples. - numpy.ndarray, query times for each sample. Examples: >>> generate([[0.1,0.2,0.2],[0.2,0.3,0.4]],[2,6]) """ if labels is not None: inputs, labels = check_pair_numpy_param('inputs', inputs, 'labels', labels) if not self._sparse: labels = np.argmax(labels, axis=1) x_adv = [] is_advs = [] queries_times = [] if labels is not None: self.y_targets = labels for i, x_single in enumerate(inputs): self.queries = 0 if self.image_targets is not None: self.image_target = self.image_targets[i] if self.y_targets is not None: self.y_target = self.y_targets[i] is_adv, adv_img, query_time = self._generate_one(x_single) x_adv.append(adv_img) is_advs.append(is_adv) queries_times.append(query_time) return np.asarray(is_advs), \ np.asarray(x_adv), \ np.asarray(queries_times)
def _hsja(self, sample, target_label, target_image, dim, theta): """ The main algorithm for HopSkipJumpAttack. Args: sample (numpy.ndarray): Input image. Without the batchsize dimension. target_label (int): Integer for targeted attack, None for nontargeted attack. Without the batchsize dimension. target_image (numpy.ndarray): An array with the same size as input sample, or None. Without the batchsize dimension. Returns: numpy.ndarray, perturbed images. """ original_label = None # Original label for untargeted attack. if target_label is None: original_label = self._model.predict(sample) original_label = np.argmax(original_label) # Initialize perturbed image. # untarget attack if target_image is None: perturbed = self._initialize(sample, original_label, target_label) if perturbed is None: msg = 'Can not find an initial adversarial example' LOGGER.info(TAG, msg) return perturbed else: # Target attack perturbed = target_image # Project the initial perturbed image to the decision boundary. perturbed, dist_post_update = self._binary_search_batch(sample, np.expand_dims(perturbed, 0), original_label, target_label, theta) # Calculate the distance of perturbed image and original sample dist = self._compute_distance(perturbed, sample) for j in np.arange(self._num_iterations): current_iteration = j + 1 # Select delta. delta = self._select_delta(dist_post_update, current_iteration, dim, theta) # Choose number of evaluations. num_evals = int(min([self._init_num_evals*np.sqrt(j + 1), self._max_num_evals])) # approximate gradient. gradf = self._approximate_gradient(perturbed, num_evals, original_label, target_label, delta, theta) if self._constraint == 'linf': update = np.sign(gradf) else: update = gradf # search step size. if self._stepsize_search == 'geometric_progression': # find step size. epsilon = self._geometric_progression_for_stepsize( perturbed, update, dist, current_iteration, original_label, target_label) # Update the sample. perturbed = _clip_image(perturbed + epsilon*update, self._clip_min, self._clip_max) # Binary search to return to the boundary. perturbed, dist_post_update = self._binary_search_batch( sample, perturbed[None], original_label, target_label, theta) elif self._stepsize_search == 'grid_search': epsilons = np.logspace(-4, 0, num=20, endpoint=True)*dist epsilons_shape = [20] + len(np.shape(sample))*[1] perturbeds = perturbed + epsilons.reshape( epsilons_shape)*update perturbeds = _clip_image(perturbeds, self._clip_min, self._clip_max) idx_perturbed = self._decision_function(perturbeds, original_label, target_label) if np.sum(idx_perturbed) > 0: # Select the perturbation that yields the minimum distance # after binary search. perturbed, dist_post_update = self._binary_search_batch( sample, perturbeds[idx_perturbed], original_label, target_label, theta) # compute new distance. dist = self._compute_distance(perturbed, sample) LOGGER.debug(TAG, 'iteration: %d, %s distance %4f', j + 1, self._constraint, dist) perturbed = np.expand_dims(perturbed, 0) return perturbed def _decision_function(self, images, original_label, target_label): """ Decision function returns 1 if the input sample is on the desired side of the boundary, and 0 otherwise. """ images = _clip_image(images, self._clip_min, self._clip_max) prob = [] self.queries += len(images) for i in range(0, len(images), self._batch_size): batch = images[i:i + self._batch_size] length = len(batch) prob_i = self._model.predict(batch)[:length] prob.append(prob_i) prob = np.concatenate(prob) if target_label is None: res = np.argmax(prob, axis=1) != original_label else: res = np.argmax(prob, axis=1) == target_label return res def _compute_distance(self, original_img, perturbation_img): """ Compute the distance between original image and perturbation images. """ if self._constraint == 'l2': distance = np.linalg.norm(original_img - perturbation_img) else: distance = np.max(abs(original_img - perturbation_img)) return distance def _approximate_gradient(self, sample, num_evals, original_label, target_label, delta, theta): """ Gradient direction estimation. """ # Generate random noise based on constraint. noise_shape = [num_evals] + list(np.shape(sample)) if self._constraint == 'l2': random_noise = np.random.randn(*noise_shape) else: random_noise = np.random.uniform(low=-1, high=1, size=noise_shape) axis = tuple(range(1, 1 + len(np.shape(sample)))) random_noise = random_noise / np.sqrt( np.sum(random_noise**2, axis=axis, keepdims=True)) # perturbed images perturbed = sample + delta*random_noise perturbed = _clip_image(perturbed, self._clip_min, self._clip_max) random_noise = (perturbed - sample) / theta # Whether the perturbed images are on the desired side of the boundary. decisions = self._decision_function(perturbed, original_label, target_label) decision_shape = [len(decisions)] + [1]*len(np.shape(sample)) # transform decisions value from 1, 0 to 1, -2 re_decision = 2*np.array(decisions).astype(self._np_dtype).reshape( decision_shape) - 1.0 if np.mean(re_decision) == 1.0: grad_direction = np.mean(random_noise, axis=0) elif np.mean(re_decision) == -1.0: grad_direction = - np.mean(random_noise, axis=0) else: re_decision = re_decision - np.mean(re_decision) grad_direction = np.mean(re_decision*random_noise, axis=0) # The gradient direction. grad_direction = grad_direction / (np.linalg.norm(grad_direction) + 1e-10) return grad_direction def _project(self, original_image, perturbed_images, alphas): """ Projection input samples onto given l2 or linf balls. """ alphas_shape = [len(alphas)] + [1]*len(np.shape(original_image)) alphas = alphas.reshape(alphas_shape) if self._constraint == 'l2': projected = (1 - alphas)*original_image + alphas*perturbed_images else: projected = _clip_image(perturbed_images, original_image - alphas, original_image + alphas) return projected def _binary_search_batch(self, original_image, perturbed_images, original_label, target_label, theta): """ Binary search to approach the model decision boundary. """ # Compute distance between perturbed image and original image. dists_post_update = np.array([self._compute_distance(original_image, perturbed_image,) for perturbed_image in perturbed_images]) # Get higher thresholds if self._constraint == 'l2': highs = np.ones(len(perturbed_images)) thresholds = theta else: highs = dists_post_update thresholds = np.minimum(dists_post_update*theta, theta) # Get lower thresholds lows = np.zeros(len(perturbed_images)) # Update thresholds. while np.max((highs - lows) / thresholds) > 1: mids = (highs + lows) / 2.0 mid_images = self._project(original_image, perturbed_images, mids) decisions = self._decision_function(mid_images, original_label, target_label) lows = np.where(decisions == [0], mids, lows) highs = np.where(decisions == [1], mids, highs) out_images = self._project(original_image, perturbed_images, highs) # Select the best choice based on the distance of the output image. dists = np.array( [self._compute_distance(original_image, out_image) for out_image in out_images]) idx = np.argmin(dists) dist = dists_post_update[idx] out_image = out_images[idx] return out_image, dist def _initialize(self, sample, original_label, target_label): """ Implementation of BlendedUniformNoiseAttack """ num_evals = 0 while True: random_noise = np.random.uniform(self._clip_min, self._clip_max, size=np.shape(sample)) success = self._decision_function(random_noise[None], original_label, target_label) if success: break num_evals += 1 if num_evals > 1e3: return None # Binary search. low = 0.0 high = 1.0 while high - low > 0.001: mid = (high + low) / 2.0 blended = (1 - mid)*sample + mid*random_noise success = self._decision_function(blended[None], original_label, target_label) if success: high = mid else: low = mid initialization = (1 - high)*sample + high*random_noise return initialization def _geometric_progression_for_stepsize(self, perturbed, update, dist, current_iteration, original_label, target_label): """ Search for stepsize in the way of Geometric progression. Keep decreasing stepsize by half until reaching the desired side of the decision boundary. """ epsilon = dist / np.sqrt(current_iteration) while True: updated = perturbed + epsilon*update success = self._decision_function(updated, original_label, target_label) if success: break epsilon = epsilon / 2.0 return epsilon def _select_delta(self, dist_post_update, current_iteration, dim, theta): """ Choose the delta based on the distance between the input sample and the perturbed sample. """ if current_iteration == 1: delta = 0.1*(self._clip_max - self._clip_min) else: if self._constraint == 'l2': delta = np.sqrt(dim)*theta*dist_post_update else: delta = dim*theta*dist_post_update return delta