Source code for mindspore_gs.ptq.ptq_config

# Copyright 2024 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""algorithm related configs"""

from dataclasses import dataclass, field, is_dataclass, asdict
from enum import Enum
from typing import List, Union

from mindspore import dtype as msdtype

from mindspore_gs.common.config import GSBaseConfig
from mindspore_gs.common.utils import value_check, list_value_check
from mindspore_gs.common.register import RegisterMachine
from mindspore_gs.common.gs_enum import BackendTarget

algo_cfg_register = RegisterMachine()


class PTQApproach(Enum):
    """
    PTQ approach enums
    """
    SMOOTH_QUANT = 'smooth_quant'
    RTN = 'rtn'
    GPTQ = 'gptq'
    OMNI_QUANT = 'omni_quant'
    PTQ = 'ptq'


[docs]class PTQMode(Enum): """ Mode for ptq quantizer. - ``QUANTIZE``: indicate ptq quantizer in quantize mode. - ``DEPLOY``: indicate ptq quantizer in deploy mode. """ QUANTIZE = 'quantize' DEPLOY = 'deploy'
[docs]class OutliersSuppressionType(Enum): """ Outliers suppression type for ptq quantizer. - ``SMOOTH``: apply smooth scale between weight and activate. - ``NONE``: not doing any outliers suppression. """ SMOOTH = 'smooth' NONE = 'none'
class LayerQuantizeAlgo(Enum): """ Quantization algorithm for each layer. - ``A16W8``: apply. """ A16W8 = 'a16w8' A8W8 = 'a8w8' @algo_cfg_register.register(PTQApproach.OMNI_QUANT) @dataclass class OmniQuantConfig: """config for omni quant algorithm""" pre_clip_ratio: Union[list, float] = 1.0 post_clip_ratio: Union[list, float] = 1.0 smooth_alpha: Union[list, float] = 0.5 is_revert_by_loss: bool = False def __post_init__(self): value_check('pre_clip_ratio', self.pre_clip_ratio, Union[list, float]) value_check('post_clip_ratio', self.post_clip_ratio, Union[list, float]) value_check('smooth_alpha', self.smooth_alpha, Union[list, float]) value_check('is_revert_by_loss', self.is_revert_by_loss, bool) if (not isinstance(self.pre_clip_ratio, type(self.post_clip_ratio))) or \ (not isinstance(self.pre_clip_ratio, type(self.smooth_alpha))) or \ (not isinstance(self.post_clip_ratio, type(self.smooth_alpha))): raise ValueError(f"pre_clip_ratio, post_clip_ratio and smooth_alpha should have same type," f"but got pre_clip_ratio: {type(self.pre_clip_ratio)}," f"post_clip_ratio: {type(self.post_clip_ratio)}," f"smooth_alpha: {type(self.smooth_alpha)}.") @algo_cfg_register.register(PTQApproach.PTQ) @dataclass class PTQQuantConfig: """config for omni quant algorithm""" @algo_cfg_register.register(PTQApproach.SMOOTH_QUANT) @dataclass class SmoothQuantConfig: """config for smooth quant algorithm""" alpha: float = 0.5 def __post_init__(self): value_check('alpha', self.alpha, float) @algo_cfg_register.register(PTQApproach.RTN) @dataclass class RTNConfig: """ Config for round to nearest algorithms. """
[docs]@dataclass class PTQConfig: """ Config for post trainning quantization. Args: mode (:class:`mindspore_gs.ptq.PTQMode`): Flag for ptq mode, ``QUANTIZATION`` for quantization mode, ``DEPLOY`` for deploy mode. backend (:class:`mindspore_gs.common.BackendTarget`): Flag for backend target, ``NONE`` for no specific backend, ``ASCEND`` for ascend backend. opname_blacklist (List[str]): Blacklist of opname. Layers in network with name fuzzy matched with this blacklist will not being quanted. algo_args (Union[dict, dataclass]): Used to configure hyperparameters of algorithms such as RTN, SmoothQuant, and OmniQuant. act_quant_dtype (mindspore.dtype): Used to configure the quantization type of activation. mindspore.dtype.int8 indicates that the activation is quantized by 8 bits, and None indicates that it is not quantized. weight_quant_dtype (mindspore.dtype): Used to configure the quantization type of weight. mindspore.dtype.int8 indicates that the weight is quantized by 8 bits, and None indicates that it is not quantized. kvcache_quant_dtype (mindspore.dtype): Used to configure the quantization type of kvcache. mindspore.dtype.int8 indicates that the kvcache is quantized by 8 bits, and None indicates that it is not quantized. outliers_suppression (:class:`mindspore_gs.ptq.OutliersSuppressionType`): Used to configure outliers suppression method before quantization. OutliersSuppressionType.SMOOTH indicates using smooth method from SmoothQuant to suppress outliers, and OutliersSuppressionType.NONE as default indicates doing nothing for outliers. Raises: ValueError: If `mode` is not PTQMode.QUANTIZE or PTQMode.DEPLOY. ValueError: If `backend` is not BackendTarget.NONE or BackendTarget.ASCEND. TypeError: If `opname_blacklist` is not a list of str. ValueError: If `weight_quant_dtype` is not mindspore.dtype.int8 or None. ValueError: If `kvcache_quant_dtype` is not mindspore.dtype.int8 or None. ValueError: If `act_quant_dtype` is not mindspore.dtype.int8 or None. TypeError: If `outliers_suppression` is not a OutliersSuppressionType. Examples: >>> from mindspore_gs.ptq import PTQConfig, PTQMode >>> from mindspore_gs.common import BackendTarget >>> PTQConfig(mode=PTQMode.DEPLOY, backend=BackendTarget.ASCEND, opname_blacklist=['layer0']) PTQConfig(mode=<PTQMode.DEPLOY: 'deploy'>, backend=<BackendTarget.ASCEND: 'ascend'>, opname_blacklist=['layer0'], algo_args={}) """ mode: PTQMode = PTQMode.QUANTIZE backend: BackendTarget = BackendTarget.ASCEND opname_blacklist: List[str] = field(default_factory=list) algo_args: Union[dict, dataclass] = field(default_factory=dict) weight_quant_dtype: msdtype = msdtype.int8 kvcache_quant_dtype: msdtype = None act_quant_dtype: msdtype = None outliers_suppression: OutliersSuppressionType = OutliersSuppressionType.NONE def __post_init__(self): if self.mode not in PTQMode.__members__.values(): raise ValueError(f'mode shall be in {PTQMode.__members__.values()}') if self.backend not in BackendTarget.__members__.values(): raise ValueError(f'backend shall be in {BackendTarget.__members__.values()}') if self.weight_quant_dtype != msdtype.int8 and self.weight_quant_dtype is not None: raise ValueError(f'self.weight_quant_dtype: {self.weight_quant_dtype} is not mindspore.dtype.int8 or None.') if self.kvcache_quant_dtype != msdtype.int8 and self.kvcache_quant_dtype is not None: raise ValueError(f'self.kvcache_quant_dtype: {self.kvcache_quant_dtype} is not mindspore.dtype.int8 or None.') if self.act_quant_dtype != msdtype.int8 and self.act_quant_dtype is not None: raise ValueError(f'self.act_quant_dtype: {self.act_quant_dtype} is not mindspore.dtype.int8 or None.') value_check('outliers_suppression', self.outliers_suppression, OutliersSuppressionType) if not isinstance(self.algo_args, dict) and not is_dataclass(self.algo_args): raise ValueError(f"algo_args's type should be dict or dataclass, but now is {type(self.algo_args)}") list_value_check('opname_blacklist', self.opname_blacklist, str) if self.algo_args and is_dataclass(self.algo_args): self.algo_args = asdict(self.algo_args)
class YamlLoader: """Loader for some special item in yaml.""" def __call__(self, src: str): raise NotImplementedError class MSDTypeLoader(YamlLoader): """Loader for `mindspore.dtype` in yaml.""" def __init__(self): self.dtype_dict = { "Bool": msdtype.bool_, "Int": msdtype.int_, "Int8": msdtype.int8, "Int16": msdtype.int16, "Int32": msdtype.int32, "Int64": msdtype.int64, "UInt8": msdtype.uint8, "UInt16": msdtype.uint16, "UInt32": msdtype.uint32, "UInt64": msdtype.uint64, "Float": msdtype.float_, "Float16": msdtype.float16, "Float32": msdtype.float32, "Float64": msdtype.float64, "BFloat16": msdtype.bfloat16, "Complex64": msdtype.complex64, "Complex128": msdtype.complex128, } def __call__(self, src: str): if src == "None": return None ms_dtype = self.dtype_dict.get(src, None) if not ms_dtype: raise ValueError(f"Unrecognized dtype: {src}") return ms_dtype @dataclass class InnerPTQConfig(GSBaseConfig, PTQConfig): """ config for post-trainning-quantizer """ approach: PTQApproach = field(default=PTQApproach.RTN) act_per_channel: bool = False weight_per_channel: bool = True kvcache_per_head: bool = True act_symmetric: bool = False weight_symmetric: bool = True kvcache_symmetric: bool = True act_narrow_range: bool = False weight_narrow_range: bool = False kvcache_narrow_range: bool = False enable_deploy_fusion: bool = True kvcache_calibrate_max_new_tokens: int = 10 smooth_to_pre_layer: bool = True act_dynamic_quant: bool = False kvcache_dynamic_quant: bool = False fallback_blacklist: dict = field(default_factory=dict) def __post_init__(self): value_check('act_per_channel', self.act_per_channel, bool) value_check('weight_per_channel', self.weight_per_channel, bool) value_check('kvcache_per_head', self.kvcache_per_head, bool) value_check('act_symmetric', self.act_symmetric, bool) value_check('weight_symmetric', self.weight_symmetric, bool) value_check('kvcache_symmetric', self.kvcache_symmetric, bool) value_check('act_narrow_range', self.act_narrow_range, bool) value_check('weight_narrow_range', self.weight_narrow_range, bool) value_check('enable_deploy_fusion', self.enable_deploy_fusion, bool) value_check('kvcache_calibrate_max_new_tokens', self.kvcache_calibrate_max_new_tokens, int) value_check('smooth_to_pre_layer', self.smooth_to_pre_layer, bool) value_check('fallback_blacklist', self.fallback_blacklist, dict) value_check('act_dynamic_quant', self.act_dynamic_quant, bool) if self.act_dynamic_quant is True and (self.weight_quant_dtype != msdtype.int8 or self.act_quant_dtype != msdtype.int8): raise ValueError(f'self.act_dynamic_quant is True, self.weight_quant_dtype: {self.weight_quant_dtype} \ and self.act_quant_dtype: {self.act_quant_dtype} must be mindspore.dtype.int8.') value_check('kvcache_dynamic_quant', self.kvcache_dynamic_quant, bool) if self.kvcache_dynamic_quant is True and self.kvcache_quant_dtype != msdtype.int8: raise ValueError(f'self.kvcache_dynamic_quant is True, self.kvcache_quant_dtype: {self.kvcache_quant_dtype} \ must be mindspore.dtype.int8.') if self.approach not in PTQApproach.__members__.values(): raise ValueError(f'Invalid approach: {self.approach}') self._check_rtn() if list(set(self.fallback_blacklist.keys()) & set(self.opname_blacklist)): raise ValueError("There should be no repetition between opname_blacklist and fallback_a16w8_blacklist," f"now opname_blacklist={self.opname_blacklist}," f"fallback_a16w8_blacklist={self.fallback_blacklist}") if not self.algo_args: args_config = algo_cfg_register[self.approach] if args_config is not None and is_dataclass(args_config): self.algo_args.update(asdict(args_config())) def _check_rtn(self): if self.approach is PTQApproach.RTN and self.act_quant_dtype == msdtype.int8 and self.act_dynamic_quant is False: raise ValueError(f"{self.approach} is not support act_quant_dtype == mindspore.dtype.int8 when act_dynamic_quant is False.") if self.approach is PTQApproach.RTN and self.weight_quant_dtype == msdtype.int8 and self.kvcache_quant_dtype == msdtype.int8 \ and self.kvcache_dynamic_quant is False: raise ValueError(f"when self.kvcache_dynamic_quant is False, weight_quant_dtype and kvcache_quant_dtype are mindspore.dtype.int8, \ {self.approach} isn't supported.") if self.approach is PTQApproach.RTN and self.weight_quant_dtype is None and self.kvcache_quant_dtype is None: raise ValueError(f"weight_quant_dtype and kvcache_quant_dtype are None, {self.approach} can't take effect.") def _parse_dict(self): """ parse data class to readable dicts""" parsed_dict = self.__dict__ parsed_dict['backend'] = self.backend.name parsed_dict['mode'] = self.mode.name parsed_dict['approach'] = self.approach.name parsed_dict['opname_blacklist'] = self.opname_blacklist parsed_dict['kvcache_quant_dtype'] = str(self.kvcache_quant_dtype) parsed_dict['weight_quant_dtype'] = str(self.weight_quant_dtype) parsed_dict['act_quant_dtype'] = str(self.act_quant_dtype) parsed_dict['outliers_suppression'] = self.outliers_suppression.name return parsed_dict def _unparse_dict(self, data_dict): """ convert readable dicts to data config""" def update_dict(key, decode_fn): nonlocal data_dict if key not in data_dict: raise ValueError(f'{key} shall in yaml, but not found') if isinstance(decode_fn, YamlLoader): data_dict[key] = decode_fn(data_dict[key]) else: data_dict[key] = decode_fn[data_dict[key]] unparse_list = [ ('mode', PTQMode), ('backend', BackendTarget), ('approach', PTQApproach), ('outliers_suppression', OutliersSuppressionType), ('kvcache_quant_dtype', MSDTypeLoader()), ('weight_quant_dtype', MSDTypeLoader()), ('act_quant_dtype', MSDTypeLoader()) ] for item in unparse_list: update_dict(*item) self.__dict__.update(data_dict) @staticmethod def inner_config(cfg: PTQConfig, approach=None): """convert PTQConfig to InnerConfig""" if not isinstance(cfg, PTQConfig): raise TypeError(f'input config shall be PTQConfig, but got {type(cfg)}') if not approach: inner_cfg = InnerPTQConfig() else: inner_cfg = InnerPTQConfig(approach=approach) for key, val in asdict(cfg).items(): if key == "algo_args": inner_cfg.algo_args.update(val) else: setattr(inner_cfg, key, val) return inner_cfg