# Copyright 2019 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Evaluating Defense against Black-box Attacks.
"""
import numpy as np
from mindarmour.utils.logger import LogUtil
from mindarmour.utils._check_param import check_pair_numpy_param, \
check_equal_length, check_int_positive, check_numpy_param
LOGGER = LogUtil.get_instance()
TAG = 'BlackDefenseEvaluate'
[docs]class BlackDefenseEvaluate:
"""
Evaluation metrics of anti-black-box defense method.
Args:
raw_preds (numpy.ndarray): Predict results of some certain samples on
raw model.
def_preds (numpy.ndarray): Predict results of some certain samples on
defensed model.
raw_query_counts (numpy.ndarray): Number of queries to generate
adversarial examples on raw model, which is one dimensional whose
size is raw_preds.shape[0]. For benign samples, query count must be
set to 0.
def_query_counts (numpy.ndarray): Number of queries to generate
adversarial examples on defensed model, which is one dimensional
whose size is raw_preds.shape[0].
For benign samples, query count must be set to 0.
raw_query_time (numpy.ndarray): The total time duration to generate
an adversarial example on raw model, which is one dimensional
whose size is raw_preds.shape[0].
def_query_time (numpy.ndarray): The total time duration to generate an
adversarial example on defensed model, which is one dimensional
whose size is raw_preds.shape[0].
def_detection_counts (numpy.ndarray): Total number of detected queries
during each adversarial example generation, which is one dimensional
whose size is raw_preds.shape[0]. For a benign sample, the
def_detection_counts is set to 1 if the query is identified as
suspicious, and 0 otherwise.
true_labels (numpy.ndarray): True labels in one-dim whose size is
raw_preds.shape[0].
max_queries (int): Attack budget, the maximum number of queries.
Examples:
>>> raw_preds = np.array([[0.1, 0.1, 0.2, 0.6],
>>> [0.1, 0.7, 0.0, 0.2],
>>> [0.8, 0.1, 0.0, 0.1]])
>>> def_preds = np.array([[0.1, 0.1, 0.1, 0.7],
>>> [0.1, 0.6, 0.2, 0.1],
>>> [0.1, 0.2, 0.1, 0.6]])
>>> raw_query_counts = np.array([0,20,10])
>>> def_query_counts = np.array([0,50,60])
>>> raw_query_time = np.array([0.1, 2, 1])
>>> def_query_time = np.array([0.2, 6, 5])
>>> def_detection_counts = np.array([1, 5, 10])
>>> true_labels = np.array([3, 1, 0])
>>> max_queries = 100
>>> def_eval = BlackDefenseEvaluat(raw_preds,
>>> def_preds,
>>> raw_query_counts,
>>> def_query_counts,
>>> raw_query_time,
>>> def_query_time,
>>> def_detection_counts,
>>> true_labels,
>>> max_queries)
>>> def_eval.qcv()
"""
def __init__(self, raw_preds, def_preds, raw_query_counts, def_query_counts,
raw_query_time, def_query_time, def_detection_counts,
true_labels, max_queries):
self._raw_preds, self._def_preds = check_pair_numpy_param('raw_preds',
raw_preds,
'def_preds',
def_preds)
self._num_samples = self._raw_preds.shape[0]
self._raw_query_counts, _ = check_equal_length('raw_query_counts',
raw_query_counts,
'number of sample',
self._raw_preds)
self._def_query_counts, _ = check_equal_length('def_query_counts',
def_query_counts,
'number of sample',
self._raw_preds)
self._raw_query_time, _ = check_equal_length('raw_query_time',
raw_query_time,
'number of sample',
self._raw_preds)
self._def_query_time, _ = check_equal_length('def_query_time',
def_query_time,
'number of sample',
self._raw_preds)
self._num_adv_samples = self._raw_query_counts[
self._raw_query_counts > 0].shape[0]
self._num_adv_samples = check_int_positive(
'the number of adversarial samples',
self._num_adv_samples)
self._num_ben_samples = self._num_samples - self._num_adv_samples
self._max_queries = check_int_positive('max_queries', max_queries)
self._def_detection_counts = check_numpy_param('def_detection_counts',
def_detection_counts)
self._true_labels = check_numpy_param('true_labels', true_labels)
[docs] def qcv(self):
"""
Calculate query count variance (QCV).
Returns:
float, the higher, the stronger the defense is. If num_adv_samples=0,
return -1.
"""
if self._num_adv_samples == 0:
return -1
avg_def_query_count = \
np.sum(self._def_query_counts) / self._num_adv_samples
avg_raw_query_count = \
np.sum(self._raw_query_counts) / self._num_adv_samples
if (avg_def_query_count == self._max_queries) \
and (avg_raw_query_count < self._max_queries):
query_variance = 1
else:
query_variance = \
min(avg_def_query_count - avg_raw_query_count,
self._max_queries) / self._max_queries
return query_variance
[docs] def asv(self):
"""
Calculate attack success rate variance (ASV).
Returns:
float, the lower, the stronger the defense is. If num_adv_samples=0,
return -1.
"""
adv_def_preds = self._def_preds[self._def_query_counts > 0]
adv_raw_preds = self._raw_preds[self._raw_query_counts > 0]
adv_true_labels = self._true_labels[self._raw_query_counts > 0]
def_succ_num = np.sum(np.argmax(adv_def_preds, axis=1)
!= adv_true_labels)
raw_succ_num = np.sum(np.argmax(adv_raw_preds, axis=1)
!= adv_true_labels)
if self._num_adv_samples == 0:
return -1
return (raw_succ_num - def_succ_num) / self._num_adv_samples
[docs] def fpr(self):
"""
Calculate false positive rate (FPR) of the query-based detector.
Returns:
float, the lower, the higher usability the defense is. If
num_adv_samples=0, return -1.
"""
ben_detect_counts = \
self._def_detection_counts[self._def_query_counts == 0]
num_fp = ben_detect_counts[ben_detect_counts > 0].shape[0]
if self._num_ben_samples == 0:
return -1
return num_fp / self._num_ben_samples
[docs] def qrv(self):
"""
Calculate the benign query response time variance (QRV).
Returns:
float, the lower, the higher usability the defense is. If
num_adv_samples=0, return -1.
"""
if self._num_ben_samples == 0:
return -1
raw_num_queries = self._num_ben_samples
def_num_queries = self._num_ben_samples
ben_raw_query_time = self._raw_query_time[self._raw_query_counts == 0]
ben_def_query_time = self._def_query_time[self._def_query_counts == 0]
avg_raw_query_time = np.sum(ben_raw_query_time) / raw_num_queries
avg_def_query_time = np.sum(ben_def_query_time) / def_num_queries
return (avg_def_query_time -
avg_raw_query_time) / (avg_raw_query_time + 1e-12)