Source code for mindspore_gs.ptq.ptq_config

# Copyright 2024 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""algorithm related configs"""

from dataclasses import dataclass, field, is_dataclass, asdict
from enum import Enum
from typing import List

from mindspore import QuantDtype

from mindspore_gs.common.config import GSBaseConfig
from mindspore_gs.common.utils import value_check, list_value_check
from mindspore_gs.common.register import RegisterMachine
from mindspore_gs.common.gs_enum import QuantCellType, BackendTarget

algo_cfg_register = RegisterMachine()


class PTQApproach(Enum):
    """
    PTQ approach enums
    """
    SMOOTH_QUANT = 'smooth_quant'
    RTN = 'rtn'
    GPTQ = 'gptq'


[docs]class PTQMode(Enum):
    """
    Mode for ptq quantizer.

    - ``QUANTIZE``: indicate ptq quantizer in quantize mode.
    - ``DEPLOY``: indicate ptq quantizer in deploy mode.
    """
    QUANTIZE = 'quantize'
    DEPLOY = 'deploy'


@algo_cfg_register.register(PTQApproach.SMOOTH_QUANT)
@dataclass
class SmoothQuantConfig:
    """config for smooth quant algorithm"""
    alpha: float = 0.5
    is_deploy: bool = False

    def __post_init__(self):
        value_check('alpha', self.alpha, float)
        value_check('is_deploy', self.is_deploy, bool)


@algo_cfg_register.register(PTQApproach.RTN)
@dataclass
class RTNConfig:
    """
    Config for round to nearest algorithms.
    """


@dataclass
class QuantizerConfig(GSBaseConfig):
    """ quantize related config """
    bit_num: int = 8
    optypes_exclude_output_quant: List[str] = field(default_factory=lambda: [])
    algo_args: dict = field(default_factory=lambda: {})

    def __post_init__(self):
        value_check('bit_num', self.bit_num, int)
        value_check('optypes_exclude_output_quant', self.optypes_exclude_output_quant, str)
        value_check('algo_args', self.algo_args, dict)


[docs]@dataclass
class PTQConfig:
    """
    Config for post trainning quantization.

    Args:
        mode (:class:`mindspore_gs.ptq.PTQMode`): Flag for ptq mode, ``QUANTIZATION`` for quantization mode,
            ``DEPLOY`` for deploy mode.
        backend (:class:`mindspore_gs.ptq.BackendTarget`): Flag for backend target, ``NONE`` for no specific backend,
            ``ASCEND`` for ascend backend.
        opname_blacklist (List[str]): Blacklist of opname. Layers in network with name fuzzy matched with this blacklist
            will not being quanted.

    Raises:
        ValueError: If `mode` is not in PTQMode's members.
        ValueError: If `backend` is not in BackendTarget's members.
        TypeError: if `opname_blacklist` is not a list of str.

    Example:
        >>> from mindspore_gs.ptq import PTQConfig, PTQMode
        >>> from mindspore_gs.common import BackendTarget
        >>> PTQConfig(mode=PTQMode.DEPLOY, backend=BackendTarget.ASCEND, opname_blacklist=['layer0'])
        PTQConfig(mode=<PTQMode.DEPLOY: 'deploy'>, backend=<BackendTarget.ASCEND: 'ascend'>, opname_blacklist=['layer0'], algo_args={})
    """
    mode: PTQMode = field(default=PTQMode.QUANTIZE,
                          metadata={'valid_values': PTQMode.__members__.values()}
                          )
    backend: BackendTarget = field(default=BackendTarget.NONE,
                                   metadata={'valid_values': BackendTarget.__members__.values()}
                                   )
    opname_blacklist: List[str] = field(default_factory=lambda: [])

    def __post_init__(self):
        if self.mode not in PTQMode.__members__.values():
            raise ValueError(f'mode shall be in {PTQMode.__members__.values()}')
        if self.backend not in BackendTarget.__members__.values():
            raise ValueError(f'backend shall be in '
                             f'{BackendTarget.__members__.values()}')
        list_value_check('opname_blacklist', self.opname_blacklist, str)


@dataclass
class InnerPTQConfig(QuantizerConfig, PTQConfig):
    """
    config for post-trainning-quantizer
    """
    approach: PTQApproach = field(default=PTQApproach.RTN,
                                  metadata={'valid_values': PTQApproach.__members__.values()}
                                  )
    calibration_sampling_size: int = 0
    act_quant_dtype: QuantDtype = QuantDtype.INT8
    weight_quant_dtype: QuantDtype = QuantDtype.INT8
    weight_only: bool = True
    enable_kvcache_int8: bool = False
    act_per_channel: bool = False
    weight_per_channel: bool = True
    act_symmetric: bool = False
    weight_symmetric: bool = True
    act_narrow_range: bool = False
    weight_narrow_range: bool = False
    opname_blacklist: List[str] = field(default_factory=lambda: [])
    op_types: List[str] = field(default_factory=lambda: [QuantCellType.MF_LINEAR.value],
                                metadata={'choices': [
                                    item.value for item in QuantCellType.__members__.values()
                                ]})

    def __post_init__(self):
        value_check('calibration_sampling_size', self.calibration_sampling_size, int)
        value_check('act_quant_dtype', self.act_quant_dtype, QuantDtype)
        value_check('weight_quant_dtype', self.weight_quant_dtype, QuantDtype)
        value_check('weight_only', self.weight_only, bool)
        value_check('enable_kvcache_int8', self.enable_kvcache_int8, bool)
        value_check('act_per_channel', self.act_per_channel, bool)
        value_check('weight_per_channel', self.weight_per_channel, bool)
        value_check('act_symmetric', self.weight_symmetric, bool)
        value_check('act_narrow_range', self.act_narrow_range, bool)
        value_check('weight_narrow_range', self.weight_narrow_range, bool)
        value_check('opname_blacklist', self.opname_blacklist, str)
        if self.approach not in PTQApproach.__members__.values():
            raise ValueError(f'Invalid approach: {self.approach}')
        support_op_types = {
            item.value for item in QuantCellType.__members__.values()
        }
        for op_type in self.op_types:
            if op_type not in support_op_types:
                raise ValueError(f'{op_type} is not supported, all support type is {support_op_types}')
        if not self.algo_args:
            args_config = algo_cfg_register[self.approach]
            if args_config is not None and is_dataclass(args_config):
                self.algo_args.update(asdict(args_config()))

    def value_check(self):
        """value check"""
        self.__post_init__()

    def _parse_dict(self):
        """ parse data class to readable dicts"""
        parsed_dict = self.__dict__
        parsed_dict['act_quant_dtype'] = self.act_quant_dtype.name
        parsed_dict['weight_quant_dtype'] = self.weight_quant_dtype.name
        parsed_dict['backend'] = self.backend.name
        parsed_dict['mode'] = self.mode.name
        parsed_dict['approach'] = self.approach.name
        parsed_dict['opname_blacklist'] = self.opname_blacklist
        return parsed_dict

    def _unparse_dict(self, data_dict):
        """ convert readable dicts to data config"""
        def update_dict(key, enum_name):
            nonlocal data_dict
            if key not in data_dict:
                raise ValueError(f'{key} shall in yaml, but not found')
            data_dict[key] = enum_name[data_dict[key]]
        unparse_list = [
            ('act_quant_dtype', QuantDtype),
            ('weight_quant_dtype', QuantDtype),
            ('mode', PTQMode),
            ('backend', BackendTarget),
            ('approach', PTQApproach)
        ]
        for item in unparse_list:
            update_dict(*item)
        self.__dict__.update(data_dict)

    @staticmethod
    def inner_config(cfg: PTQConfig):
        """convert PTQConfig to InnerConfig"""
        if not isinstance(cfg, PTQConfig):
            raise TypeError(f'input config shall be PTQConfig, but got {type(cfg)}')
        inner_cfg = InnerPTQConfig()
        for key, val in asdict(cfg).items():
            setattr(inner_cfg, key, val)
        return inner_cfg