# Copyright 2024 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""algorithm related configs"""
from dataclasses import dataclass, field, is_dataclass, asdict
from enum import Enum
from typing import List
from mindspore import QuantDtype
from mindspore_gs.common.config import GSBaseConfig
from mindspore_gs.common.utils import value_check, list_value_check
from mindspore_gs.common.register import RegisterMachine
from mindspore_gs.common.gs_enum import QuantCellType, BackendTarget
algo_cfg_register = RegisterMachine()
class PTQApproach(Enum):
"""
PTQ approach enums
"""
SMOOTH_QUANT = 'smooth_quant'
RTN = 'rtn'
GPTQ = 'gptq'
[docs]class PTQMode(Enum):
"""
Mode for ptq quantizer.
- ``QUANTIZE``: indicate ptq quantizer in quantize mode.
- ``DEPLOY``: indicate ptq quantizer in deploy mode.
"""
QUANTIZE = 'quantize'
DEPLOY = 'deploy'
@algo_cfg_register.register(PTQApproach.SMOOTH_QUANT)
@dataclass
class SmoothQuantConfig:
"""config for smooth quant algorithm"""
alpha: float = 0.5
is_deploy: bool = False
def __post_init__(self):
value_check('alpha', self.alpha, float)
value_check('is_deploy', self.is_deploy, bool)
@algo_cfg_register.register(PTQApproach.RTN)
@dataclass
class RTNConfig:
"""
Config for round to nearest algorithms.
"""
@dataclass
class QuantizerConfig(GSBaseConfig):
""" quantize related config """
bit_num: int = 8
optypes_exclude_output_quant: List[str] = field(default_factory=lambda: [])
algo_args: dict = field(default_factory=lambda: {})
def __post_init__(self):
value_check('bit_num', self.bit_num, int)
value_check('optypes_exclude_output_quant', self.optypes_exclude_output_quant, str)
value_check('algo_args', self.algo_args, dict)
[docs]@dataclass
class PTQConfig:
"""
Config for post trainning quantization.
Args:
mode (:class:`mindspore_gs.ptq.PTQMode`): Flag for ptq mode, ``QUANTIZATION`` for quantization mode,
``DEPLOY`` for deploy mode.
backend (:class:`mindspore_gs.ptq.BackendTarget`): Flag for backend target, ``NONE`` for no specific backend,
``ASCEND`` for ascend backend.
opname_blacklist (List[str]): Blacklist of opname. Layers in network with name fuzzy matched with this blacklist
will not being quanted.
Raises:
ValueError: If `mode` is not in PTQMode's members.
ValueError: If `backend` is not in BackendTarget's members.
TypeError: if `opname_blacklist` is not a list of str.
Example:
>>> from mindspore_gs.ptq import PTQConfig, PTQMode
>>> from mindspore_gs.common import BackendTarget
>>> PTQConfig(mode=PTQMode.DEPLOY, backend=BackendTarget.ASCEND, opname_blacklist=['layer0'])
PTQConfig(mode=<PTQMode.DEPLOY: 'deploy'>, backend=<BackendTarget.ASCEND: 'ascend'>, opname_blacklist=['layer0'], algo_args={})
"""
mode: PTQMode = field(default=PTQMode.QUANTIZE,
metadata={'valid_values': PTQMode.__members__.values()}
)
backend: BackendTarget = field(default=BackendTarget.NONE,
metadata={'valid_values': BackendTarget.__members__.values()}
)
opname_blacklist: List[str] = field(default_factory=lambda: [])
def __post_init__(self):
if self.mode not in PTQMode.__members__.values():
raise ValueError(f'mode shall be in {PTQMode.__members__.values()}')
if self.backend not in BackendTarget.__members__.values():
raise ValueError(f'backend shall be in '
f'{BackendTarget.__members__.values()}')
list_value_check('opname_blacklist', self.opname_blacklist, str)
@dataclass
class InnerPTQConfig(QuantizerConfig, PTQConfig):
"""
config for post-trainning-quantizer
"""
approach: PTQApproach = field(default=PTQApproach.RTN,
metadata={'valid_values': PTQApproach.__members__.values()}
)
calibration_sampling_size: int = 0
act_quant_dtype: QuantDtype = QuantDtype.INT8
weight_quant_dtype: QuantDtype = QuantDtype.INT8
weight_only: bool = True
enable_kvcache_int8: bool = False
act_per_channel: bool = False
weight_per_channel: bool = True
act_symmetric: bool = False
weight_symmetric: bool = True
act_narrow_range: bool = False
weight_narrow_range: bool = False
opname_blacklist: List[str] = field(default_factory=lambda: [])
op_types: List[str] = field(default_factory=lambda: [QuantCellType.MF_LINEAR.value],
metadata={'choices': [
item.value for item in QuantCellType.__members__.values()
]})
def __post_init__(self):
value_check('calibration_sampling_size', self.calibration_sampling_size, int)
value_check('act_quant_dtype', self.act_quant_dtype, QuantDtype)
value_check('weight_quant_dtype', self.weight_quant_dtype, QuantDtype)
value_check('weight_only', self.weight_only, bool)
value_check('enable_kvcache_int8', self.enable_kvcache_int8, bool)
value_check('act_per_channel', self.act_per_channel, bool)
value_check('weight_per_channel', self.weight_per_channel, bool)
value_check('act_symmetric', self.weight_symmetric, bool)
value_check('act_narrow_range', self.act_narrow_range, bool)
value_check('weight_narrow_range', self.weight_narrow_range, bool)
value_check('opname_blacklist', self.opname_blacklist, str)
if self.approach not in PTQApproach.__members__.values():
raise ValueError(f'Invalid approach: {self.approach}')
support_op_types = {
item.value for item in QuantCellType.__members__.values()
}
for op_type in self.op_types:
if op_type not in support_op_types:
raise ValueError(f'{op_type} is not supported, all support type is {support_op_types}')
if not self.algo_args:
args_config = algo_cfg_register[self.approach]
if args_config is not None and is_dataclass(args_config):
self.algo_args.update(asdict(args_config()))
def value_check(self):
"""value check"""
self.__post_init__()
def _parse_dict(self):
""" parse data class to readable dicts"""
parsed_dict = self.__dict__
parsed_dict['act_quant_dtype'] = self.act_quant_dtype.name
parsed_dict['weight_quant_dtype'] = self.weight_quant_dtype.name
parsed_dict['backend'] = self.backend.name
parsed_dict['mode'] = self.mode.name
parsed_dict['approach'] = self.approach.name
parsed_dict['opname_blacklist'] = self.opname_blacklist
return parsed_dict
def _unparse_dict(self, data_dict):
""" convert readable dicts to data config"""
def update_dict(key, enum_name):
nonlocal data_dict
if key not in data_dict:
raise ValueError(f'{key} shall in yaml, but not found')
data_dict[key] = enum_name[data_dict[key]]
unparse_list = [
('act_quant_dtype', QuantDtype),
('weight_quant_dtype', QuantDtype),
('mode', PTQMode),
('backend', BackendTarget),
('approach', PTQApproach)
]
for item in unparse_list:
update_dict(*item)
self.__dict__.update(data_dict)
@staticmethod
def inner_config(cfg: PTQConfig):
"""convert PTQConfig to InnerConfig"""
if not isinstance(cfg, PTQConfig):
raise TypeError(f'input config shall be PTQConfig, but got {type(cfg)}')
inner_cfg = InnerPTQConfig()
for key, val in asdict(cfg).items():
setattr(inner_cfg, key, val)
return inner_cfg