# Copyright 2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""EfficientNet Architecture."""
import math
import copy
from functools import partial
from typing import List, Optional, Callable
import mindspore.nn as nn
from mindspore import Tensor
from mindspore.ops import operations as P
from mindvision.check_param import Validator, Rel
from mindvision.classification.models.utils import make_divisible
from mindvision.engine.class_factory import ClassFactory, ModuleType
from mindvision.classification.models.blocks import ConvNormActivation, SqueezeExcite, DropConnect, Swish
__all__ = [
'MBConvConfig',
'MBConv',
'EfficientNet', # registration mechanism to use yaml configuration
]
[docs]class MBConvConfig:
"""
The Parameters of MBConv which need to multiply the expand_ration.
Args:
expand_ratio (float): The Times of the num of out_channels with respect to in_channels.
kernel_size (int): The kernel size of the depthwise conv.
stride (int): The stride of the depthwise conv.
in_chs (int): The input_channels of the MBConv Module.
out_chs (int): The output_channels of the MBConv Module.
num_layers (int): The num of MBConv Module.
width_cnf: The ratio of the channel.
depth_cnf: The ratio of num_layers.
Returns:
None
Examples:
>>> cnf = MBConvConfig(1, 3, 1, 32, 16, 1)
>>> print(cnf.input_channels)
"""
def __init__(
self,
expand_ratio: float,
kernel_size: int,
stride: int,
in_chs: int,
out_chs: int,
num_layers: int,
width_cnf: float,
depth_cnf: float,
) -> None:
self.expand_ratio = expand_ratio
self.kernel_size = kernel_size
self.stride = stride
self.input_channels = self.adjust_channels(in_chs, width_cnf)
self.out_channels = self.adjust_channels(out_chs, width_cnf)
self.num_layers = self.adjust_depth(num_layers, depth_cnf)
[docs] @staticmethod
def adjust_channels(channels: int, width_cnf: float, min_value: Optional[int] = None) -> int:
"""
Calculate the width of MBConv.
Args:
channels (int): The number of channel.
width_cnf (float): The ratio of channel.
min_value (int, optional): The minimum number of channel. Default: None.
Returns:
int, the width of MBConv.
"""
return make_divisible(channels * width_cnf, 8, min_value)
[docs] @staticmethod
def adjust_depth(num_layers: int, depth_cnf: float) -> int:
"""
Calculate the depth of MBConv.
Args:
num_layers (int): The number of MBConv Module.
depth_cnf (float): The ratio of num_layers.
Returns:
int, the depth of MBConv.
"""
return int(math.ceil(num_layers * depth_cnf))
[docs]class MBConv(nn.Cell):
"""
MBConv Module.
Args:
cnf (MBConvConfig): The class which contains the parameters(in_channels, out_channels, nums_layers) and
the functions which help calculate the parameters after multipling the expand_ratio.
keep_prob: The dropout rate in MBConv.
norm (nn.Cell): The BatchNorm Method.Default: None.
se_layer (nn.Cell): The squeeze-excite Module. Default: SqueezeExcite.
Returns:
Tensor
Example:
>>> from mindvision.classification.backbone import MBConvConfig
>>> cnf = MBConvConfig(1, 3, 1, 32, 16, 1)
>>> x = Tensor(np.ones(1, 2, 2, 2), mindspore.float32)
>>> MBConv(cnf, 0.2, None)(x)
"""
def __init__(
self,
cnf: MBConvConfig,
keep_prob: float,
norm: Optional[nn.Cell] = None,
se_layer: Callable[..., nn.Cell] = SqueezeExcite,
) -> None:
super().__init__()
Validator.check_int_range(cnf.stride, 1, 2, Rel.INC_BOTH, "stride")
self.shortcut = cnf.stride == 1 and cnf.input_channels == cnf.out_channels
layers: List[nn.Cell] = []
activation = Swish
# expand conv: the out_channels is cnf.expand_ratio times of the in_channels.
expanded_channels = cnf.adjust_channels(cnf.input_channels, cnf.expand_ratio)
if expanded_channels != cnf.input_channels:
layers.append(
ConvNormActivation(
cnf.input_channels,
expanded_channels,
kernel_size=1,
norm=norm,
activation=activation,
)
)
# depthwise conv: splits the filter into groups.
layers.append(
ConvNormActivation(
expanded_channels,
expanded_channels,
kernel_size=cnf.kernel_size,
stride=cnf.stride,
groups=expanded_channels,
norm=norm,
activation=activation,
)
)
# squeeze and excitation
squeeze_channels = max(1, cnf.input_channels // 4)
layers.append(se_layer(expanded_channels, squeeze_channels, Swish, "sigmoid"))
# project
layers.append(
ConvNormActivation(
expanded_channels, cnf.out_channels, kernel_size=1, norm=norm, activation=None
)
)
self.block = nn.SequentialCell(layers)
self.dropout = DropConnect(keep_prob)
self.out_channels = cnf.out_channels
[docs] def construct(self, x) -> Tensor:
"""MBConv construct."""
result = self.block(x)
if self.shortcut:
result = self.dropout(result)
result += x
return result
[docs]@ClassFactory.register(ModuleType.BACKBONE)
class EfficientNet(nn.Cell):
"""
EfficientNet architecture.
Args:
width_mult (float): The ratio of the channel. Default: 1.0.
depth_mult (float): The ratio of num_layers. Default: 1.0.
inverted_residual_setting (List[MBConvConfig], optional): The settings of block. Default: None.
keep_prob (float): The dropout rate of MBConv. Default: 0.2.
block (nn.Cell, optional): The basic block of the model. Default: None.
norm_layer (nn.Cell, optional): The normalization layer. Default: None.
Inputs:
- **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
Outputs:
Tensor of shape :math:`(N, 1280)`.
Supported Platforms:
``GPU``
Examples:
>>> import numpy as np
>>> import mindspore as ms
>>> from mindvision.classification.models.backbones import EfficientNet
>>> net = EfficientNet(1, 1)
>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
>>> output = net(x)
>>> print(output.shape)
(1, 1280)
About EfficientNet:
EfficientNet systematically studys model scaling and identify that carefully balancing network depth, width,
and resolution can lead to better performance. Based on this observation, The model proposes a new scaling method
that uniformly scales all dimensions of depth/width/resolution using a simple yet highly effective compound
coefficient. This model demonstrates the effectiveness of this method on scaling up MobileNets and ResNet.
Citation:
.. code-block::
@misc{tan2020efficientnet,
title={EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks},
author={Mingxing Tan and Quoc V. Le},
year={2020},
eprint={1905.11946},
archivePrefix={arXiv},
primaryClass={cs.LG}
}
"""
def __init__(
self,
width_mult: float = 1,
depth_mult: float = 1,
inverted_residual_setting: Optional[List[MBConvConfig]] = None,
keep_prob: float = 0.2,
block: Optional[nn.Cell] = None,
norm_layer: Optional[nn.Cell] = None,
) -> None:
super(EfficientNet, self).__init__()
if block is None:
block = MBConv
if norm_layer is None:
norm_layer = nn.BatchNorm2d
if width_mult >= 1.6:
norm_layer = partial(nn.BatchNorm2d, eps=0.001, momentum=0.99)
layers: List[nn.Cell] = []
bneck_conf = partial(MBConvConfig, width_cnf=width_mult, depth_cnf=depth_mult)
if not inverted_residual_setting:
inverted_residual_setting = [
bneck_conf(1, 3, 1, 32, 16, 1),
bneck_conf(6, 3, 2, 16, 24, 2),
bneck_conf(6, 5, 2, 24, 40, 2),
bneck_conf(6, 3, 2, 40, 80, 3),
bneck_conf(6, 5, 1, 80, 112, 3),
bneck_conf(6, 5, 2, 112, 192, 4),
bneck_conf(6, 3, 1, 192, 320, 1),
]
# building first layer
firstconv_output_channels = inverted_residual_setting[0].input_channels
layers.append(
ConvNormActivation(
3, firstconv_output_channels, kernel_size=3, stride=2, norm=norm_layer, activation=Swish
)
)
# building MBConv blocks
total_stage_blocks = sum(cnf.num_layers for cnf in inverted_residual_setting)
stage_block_id = 0
# cnf is the settings of block
for cnf in inverted_residual_setting:
stage: List[nn.Cell] = []
# cnf.num_layers is the num of the same block
for _ in range(cnf.num_layers):
# copy to avoid modifications. shallow copy is enough
block_cnf = copy.copy(cnf)
# overwrite info if not the first conv in the stage
if stage:
block_cnf.input_channels = block_cnf.out_channels
block_cnf.stride = 1
# adjust dropout rate of blocks based on the depth of the stage block
sd_prob = keep_prob * float(stage_block_id) / total_stage_blocks
stage.append(block(block_cnf, sd_prob, norm_layer))
stage_block_id += 1
layers.append(nn.SequentialCell(stage))
# building last several layers
lastconv_input_channels = inverted_residual_setting[-1].out_channels
lastconv_output_channels = 4 * lastconv_input_channels
layers.append(
ConvNormActivation(
lastconv_input_channels,
lastconv_output_channels,
kernel_size=1,
norm=norm_layer,
activation=Swish,
)
)
self.features = nn.SequentialCell(layers)
self.avgpool = P.AdaptiveAvgPool2D(1)
[docs] def construct(self, x) -> Tensor:
"""Efficientnet construct."""
x = self.features(x)
x = self.avgpool(x)
x = P.Flatten()(x)
return x