# Copyright 2024 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""algorithm related configs"""
from dataclasses import dataclass, field, is_dataclass, asdict
from enum import Enum
from typing import List, Union
from mindspore import dtype as msdtype
from mindspore_gs.common.config import GSBaseConfig
from mindspore_gs.common.utils import value_check, list_value_check
from mindspore_gs.common.register import RegisterMachine
from mindspore_gs.common.gs_enum import BackendTarget
algo_cfg_register = RegisterMachine()
class PTQApproach(Enum):
"""
PTQ approach enums
"""
SMOOTH_QUANT = 'smooth_quant'
RTN = 'rtn'
GPTQ = 'gptq'
OMNI_QUANT = 'omni_quant'
PTQ = 'ptq'
[docs]class PTQMode(Enum):
"""
Mode for ptq quantizer.
- ``QUANTIZE``: indicate ptq quantizer in quantize mode.
- ``DEPLOY``: indicate ptq quantizer in deploy mode.
"""
QUANTIZE = 'quantize'
DEPLOY = 'deploy'
class LayerQuantizeAlgo(Enum):
"""
Quantization algorithm for each layer.
- ``A16W8``: apply.
"""
A16W8 = 'a16w8'
A8W8 = 'a8w8'
@algo_cfg_register.register(PTQApproach.OMNI_QUANT)
@dataclass
class OmniQuantConfig:
"""config for omni quant algorithm"""
pre_clip_ratio: Union[list, float] = 1.0
post_clip_ratio: Union[list, float] = 1.0
smooth_alpha: Union[list, float] = 0.5
is_revert_by_loss: bool = False
def __post_init__(self):
value_check('pre_clip_ratio', self.pre_clip_ratio, Union[list, float])
value_check('post_clip_ratio', self.post_clip_ratio, Union[list, float])
value_check('smooth_alpha', self.smooth_alpha, Union[list, float])
value_check('is_revert_by_loss', self.is_revert_by_loss, bool)
if (not isinstance(self.pre_clip_ratio, type(self.post_clip_ratio))) or \
(not isinstance(self.pre_clip_ratio, type(self.smooth_alpha))) or \
(not isinstance(self.post_clip_ratio, type(self.smooth_alpha))):
raise ValueError(f"pre_clip_ratio, post_clip_ratio and smooth_alpha should have same type,"
f"but got pre_clip_ratio: {type(self.pre_clip_ratio)},"
f"post_clip_ratio: {type(self.post_clip_ratio)},"
f"smooth_alpha: {type(self.smooth_alpha)}.")
@algo_cfg_register.register(PTQApproach.PTQ)
@dataclass
class PTQQuantConfig:
"""config for omni quant algorithm"""
@algo_cfg_register.register(PTQApproach.SMOOTH_QUANT)
@dataclass
class SmoothQuantConfig:
"""config for smooth quant algorithm"""
alpha: float = 0.5
def __post_init__(self):
value_check('alpha', self.alpha, float)
@algo_cfg_register.register(PTQApproach.RTN)
@dataclass
class RTNConfig:
"""
Config for round to nearest algorithms.
"""
[docs]@dataclass
class PTQConfig:
"""
Config for post trainning quantization.
Args:
mode (:class:`mindspore_gs.ptq.PTQMode`): Flag for ptq mode, ``QUANTIZATION`` for quantization mode,
``DEPLOY`` for deploy mode.
backend (:class:`mindspore_gs.common.BackendTarget`): Flag for backend target, ``NONE`` for no specific backend,
``ASCEND`` for ascend backend.
opname_blacklist (List[str]): Blacklist of opname. Layers in network with name fuzzy matched with this blacklist
will not being quanted.
algo_args (Union[dict, dataclass]): Used to configure hyperparameters of algorithms such as RTN, SmoothQuant,
and OmniQuant.
act_quant_dtype (mindspore.dtype): Used to configure the quantization type of activation. mindspore.dtype.int8
indicates that the activation is quantized by 8 bits, and None indicates that it is not quantized.
weight_quant_dtype (mindspore.dtype): Used to configure the quantization type of weight. mindspore.dtype.int8
indicates that the weight is quantized by 8 bits, and None indicates that it is not quantized.
kvcache_quant_dtype (mindspore.dtype): Used to configure the quantization type of kvcache. mindspore.dtype.int8
indicates that the kvcache is quantized by 8 bits, and None indicates that it is not quantized.
outliers_suppression (:class:`mindspore_gs.ptq.OutliersSuppressionType`): Used to configure outliers suppression
method before quantization. OutliersSuppressionType.SMOOTH indicates using smooth method from SmoothQuant
to suppress outliers, and OutliersSuppressionType.NONE as default indicates doing nothing for outliers.
Raises:
ValueError: If `mode` is not PTQMode.QUANTIZE or PTQMode.DEPLOY.
ValueError: If `backend` is not BackendTarget.NONE or BackendTarget.ASCEND.
TypeError: If `opname_blacklist` is not a list of str.
ValueError: If `weight_quant_dtype` is not mindspore.dtype.int8 or None.
ValueError: If `kvcache_quant_dtype` is not mindspore.dtype.int8 or None.
ValueError: If `act_quant_dtype` is not mindspore.dtype.int8 or None.
TypeError: If `outliers_suppression` is not a OutliersSuppressionType.
Examples:
>>> from mindspore_gs.ptq import PTQConfig, PTQMode
>>> from mindspore_gs.common import BackendTarget
>>> PTQConfig(mode=PTQMode.DEPLOY, backend=BackendTarget.ASCEND, opname_blacklist=['layer0'])
PTQConfig(mode=<PTQMode.DEPLOY: 'deploy'>, backend=<BackendTarget.ASCEND: 'ascend'>, opname_blacklist=['layer0'], algo_args={})
"""
mode: PTQMode = PTQMode.QUANTIZE
backend: BackendTarget = BackendTarget.ASCEND
opname_blacklist: List[str] = field(default_factory=list)
algo_args: Union[dict, dataclass] = field(default_factory=dict)
weight_quant_dtype: msdtype = msdtype.int8
kvcache_quant_dtype: msdtype = None
act_quant_dtype: msdtype = None
outliers_suppression: OutliersSuppressionType = OutliersSuppressionType.NONE
def __post_init__(self):
if self.mode not in PTQMode.__members__.values():
raise ValueError(f'mode shall be in {PTQMode.__members__.values()}')
if self.backend not in BackendTarget.__members__.values():
raise ValueError(f'backend shall be in {BackendTarget.__members__.values()}')
if self.weight_quant_dtype != msdtype.int8 and self.weight_quant_dtype is not None:
raise ValueError(f'self.weight_quant_dtype: {self.weight_quant_dtype} is not mindspore.dtype.int8 or None.')
if self.kvcache_quant_dtype != msdtype.int8 and self.kvcache_quant_dtype is not None:
raise ValueError(f'self.kvcache_quant_dtype: {self.kvcache_quant_dtype} is not mindspore.dtype.int8 or None.')
if self.act_quant_dtype != msdtype.int8 and self.act_quant_dtype is not None:
raise ValueError(f'self.act_quant_dtype: {self.act_quant_dtype} is not mindspore.dtype.int8 or None.')
value_check('outliers_suppression', self.outliers_suppression, OutliersSuppressionType)
if not isinstance(self.algo_args, dict) and not is_dataclass(self.algo_args):
raise ValueError(f"algo_args's type should be dict or dataclass, but now is {type(self.algo_args)}")
list_value_check('opname_blacklist', self.opname_blacklist, str)
if self.algo_args and is_dataclass(self.algo_args):
self.algo_args = asdict(self.algo_args)
class YamlLoader:
"""Loader for some special item in yaml."""
def __call__(self, src: str):
raise NotImplementedError
class MSDTypeLoader(YamlLoader):
"""Loader for `mindspore.dtype` in yaml."""
def __init__(self):
self.dtype_dict = {
"Bool": msdtype.bool_,
"Int": msdtype.int_,
"Int8": msdtype.int8,
"Int16": msdtype.int16,
"Int32": msdtype.int32,
"Int64": msdtype.int64,
"UInt8": msdtype.uint8,
"UInt16": msdtype.uint16,
"UInt32": msdtype.uint32,
"UInt64": msdtype.uint64,
"Float": msdtype.float_,
"Float16": msdtype.float16,
"Float32": msdtype.float32,
"Float64": msdtype.float64,
"BFloat16": msdtype.bfloat16,
"Complex64": msdtype.complex64,
"Complex128": msdtype.complex128,
}
def __call__(self, src: str):
if src == "None":
return None
ms_dtype = self.dtype_dict.get(src, None)
if not ms_dtype:
raise ValueError(f"Unrecognized dtype: {src}")
return ms_dtype
@dataclass
class InnerPTQConfig(GSBaseConfig, PTQConfig):
"""
config for post-trainning-quantizer
"""
approach: PTQApproach = field(default=PTQApproach.RTN)
act_per_channel: bool = False
weight_per_channel: bool = True
kvcache_per_head: bool = True
act_symmetric: bool = False
weight_symmetric: bool = True
kvcache_symmetric: bool = True
act_narrow_range: bool = False
weight_narrow_range: bool = False
kvcache_narrow_range: bool = False
enable_deploy_fusion: bool = True
kvcache_calibrate_max_new_tokens: int = 10
smooth_to_pre_layer: bool = True
act_dynamic_quant: bool = False
kvcache_dynamic_quant: bool = False
fallback_blacklist: dict = field(default_factory=dict)
def __post_init__(self):
value_check('act_per_channel', self.act_per_channel, bool)
value_check('weight_per_channel', self.weight_per_channel, bool)
value_check('kvcache_per_head', self.kvcache_per_head, bool)
value_check('act_symmetric', self.act_symmetric, bool)
value_check('weight_symmetric', self.weight_symmetric, bool)
value_check('kvcache_symmetric', self.kvcache_symmetric, bool)
value_check('act_narrow_range', self.act_narrow_range, bool)
value_check('weight_narrow_range', self.weight_narrow_range, bool)
value_check('enable_deploy_fusion', self.enable_deploy_fusion, bool)
value_check('kvcache_calibrate_max_new_tokens', self.kvcache_calibrate_max_new_tokens, int)
value_check('smooth_to_pre_layer', self.smooth_to_pre_layer, bool)
value_check('fallback_blacklist', self.fallback_blacklist, dict)
value_check('act_dynamic_quant', self.act_dynamic_quant, bool)
if self.act_dynamic_quant is True and (self.weight_quant_dtype != msdtype.int8 or
self.act_quant_dtype != msdtype.int8):
raise ValueError(f'self.act_dynamic_quant is True, self.weight_quant_dtype: {self.weight_quant_dtype} \
and self.act_quant_dtype: {self.act_quant_dtype} must be mindspore.dtype.int8.')
value_check('kvcache_dynamic_quant', self.kvcache_dynamic_quant, bool)
if self.kvcache_dynamic_quant is True and self.kvcache_quant_dtype != msdtype.int8:
raise ValueError(f'self.kvcache_dynamic_quant is True, self.kvcache_quant_dtype: {self.kvcache_quant_dtype} \
must be mindspore.dtype.int8.')
if self.approach not in PTQApproach.__members__.values():
raise ValueError(f'Invalid approach: {self.approach}')
self._check_rtn()
if list(set(self.fallback_blacklist.keys()) & set(self.opname_blacklist)):
raise ValueError("There should be no repetition between opname_blacklist and fallback_a16w8_blacklist,"
f"now opname_blacklist={self.opname_blacklist},"
f"fallback_a16w8_blacklist={self.fallback_blacklist}")
if not self.algo_args:
args_config = algo_cfg_register[self.approach]
if args_config is not None and is_dataclass(args_config):
self.algo_args.update(asdict(args_config()))
def _check_rtn(self):
if self.approach is PTQApproach.RTN and self.act_quant_dtype == msdtype.int8 and self.act_dynamic_quant is False:
raise ValueError(f"{self.approach} is not support act_quant_dtype == mindspore.dtype.int8 when act_dynamic_quant is False.")
if self.approach is PTQApproach.RTN and self.weight_quant_dtype == msdtype.int8 and self.kvcache_quant_dtype == msdtype.int8 \
and self.kvcache_dynamic_quant is False:
raise ValueError(f"when self.kvcache_dynamic_quant is False, weight_quant_dtype and kvcache_quant_dtype are mindspore.dtype.int8, \
{self.approach} isn't supported.")
if self.approach is PTQApproach.RTN and self.weight_quant_dtype is None and self.kvcache_quant_dtype is None:
raise ValueError(f"weight_quant_dtype and kvcache_quant_dtype are None, {self.approach} can't take effect.")
def _parse_dict(self):
""" parse data class to readable dicts"""
parsed_dict = self.__dict__
parsed_dict['backend'] = self.backend.name
parsed_dict['mode'] = self.mode.name
parsed_dict['approach'] = self.approach.name
parsed_dict['opname_blacklist'] = self.opname_blacklist
parsed_dict['kvcache_quant_dtype'] = str(self.kvcache_quant_dtype)
parsed_dict['weight_quant_dtype'] = str(self.weight_quant_dtype)
parsed_dict['act_quant_dtype'] = str(self.act_quant_dtype)
parsed_dict['outliers_suppression'] = self.outliers_suppression.name
return parsed_dict
def _unparse_dict(self, data_dict):
""" convert readable dicts to data config"""
def update_dict(key, decode_fn):
nonlocal data_dict
if key not in data_dict:
raise ValueError(f'{key} shall in yaml, but not found')
if isinstance(decode_fn, YamlLoader):
data_dict[key] = decode_fn(data_dict[key])
else:
data_dict[key] = decode_fn[data_dict[key]]
unparse_list = [
('mode', PTQMode),
('backend', BackendTarget),
('approach', PTQApproach),
('outliers_suppression', OutliersSuppressionType),
('kvcache_quant_dtype', MSDTypeLoader()),
('weight_quant_dtype', MSDTypeLoader()),
('act_quant_dtype', MSDTypeLoader())
]
for item in unparse_list:
update_dict(*item)
self.__dict__.update(data_dict)
@staticmethod
def inner_config(cfg: PTQConfig, approach=None):
"""convert PTQConfig to InnerConfig"""
if not isinstance(cfg, PTQConfig):
raise TypeError(f'input config shall be PTQConfig, but got {type(cfg)}')
if not approach:
inner_cfg = InnerPTQConfig()
else:
inner_cfg = InnerPTQConfig(approach=approach)
for key, val in asdict(cfg).items():
if key == "algo_args":
inner_cfg.algo_args.update(val)
else:
setattr(inner_cfg, key, val)
return inner_cfg