# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" MobileNetV2 backbone."""
from typing import Optional, List
from mindspore import nn
from mindspore.ops.operations import Add
from mindvision.classification.models.blocks import ConvNormActivation
from mindvision.classification.models.utils import make_divisible
from mindvision.engine.class_factory import ClassFactory, ModuleType
__all__ = [
"MobileNetV2",
"InvertedResidual"
]
[docs]class InvertedResidual(nn.Cell):
"""
Mobilenetv2 residual block definition.
Args:
in_channel (int): The input channel.
out_channel (int): The output channel.
stride (int): The Stride size for the first convolutional layer. Default: 1.
expand_ratio (int): The expand ration of input channel.
norm (nn.Cell, optional): The norm layer that will be stacked on top of the convoution
layer. Default: None.
Returns:
Tensor, output tensor.
Examples:
>>> from mindvision.classification.models.backbones import InvertedResidual
>>> InvertedResidual(3, 256, 1, 1)
"""
def __init__(self,
in_channel: int,
out_channel: int,
stride: int,
expand_ratio: int,
norm: Optional[nn.Cell] = None
) -> None:
super(InvertedResidual, self).__init__()
assert stride in [1, 2]
if not norm:
norm = nn.BatchNorm2d
hidden_dim = round(in_channel * expand_ratio)
self.use_res_connect = stride == 1 and in_channel == out_channel
layers: List[nn.Cell] = []
if expand_ratio != 1:
# pw
layers.append(
ConvNormActivation(in_channel, hidden_dim, kernel_size=1, norm=norm, activation=nn.ReLU6)
)
layers.extend([
# dw
ConvNormActivation(
hidden_dim,
hidden_dim,
stride=stride,
groups=hidden_dim,
norm=norm,
activation=nn.ReLU6
),
# pw-linear
nn.Conv2d(hidden_dim, out_channel, kernel_size=1,
stride=1, has_bias=False),
norm(out_channel)
])
self.conv = nn.SequentialCell(layers)
self.add = Add()
def construct(self, x):
identity = x
x = self.conv(x)
if self.use_res_connect:
return self.add(identity, x)
return x
[docs]@ClassFactory.register(ModuleType.BACKBONE)
class MobileNetV2(nn.Cell):
"""
MobileNetV2 architecture.
Args:
alpha (int): The channels multiplier for round to 8/16 and others. Default: 1.0.
inverted_residual_setting (list, optional): Inverted residual settings. Default: None.
round_nearest (int): Round the number of channels in each layer to be a multiple of this number
set to 1 to turn off rounding. Default is 8.
block (nn.Cell, optional): Module specifying inverted residual building block for
mobilenet. Default: None.
norm (nn.Cell, optional): Norm layer that will be stacked on top of the convoution
layer. Default: None.
Inputs:
- **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`.
Outputs:
Tensor of shape :math:`(N, 1280, 7, 7)`
Supported Platforms:
``GPU``
Examples:
>>> import numpy as np
>>> import mindspore as ms
>>> from mindvision.classification.models.backbones import MobileNetV2
>>> net = MobileNetV2()
>>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32)
>>> output = net(x)
>>> print(output.shape)
(1, 1280, 7, 7)
About MobileNetV2:
The MobileNetV2 architecture is based on an inverted residual structure where the input and output
of the residual block are thin bottleneck layers opposite to traditional residual models which use
expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to
filter features in the intermediate expansion layer.
Citation:
.. code-block::
@article{2018MobileNetV2,
title={MobileNetV2: Inverted Residuals and Linear Bottlenecks},
author={ Sandler, M. and Howard, A. and Zhu, M. and Zhmoginov, A. and Chen, L. C. },
journal={IEEE},
year={2018},
}
"""
def __init__(self,
alpha: float = 1.0,
inverted_residual_setting: Optional[List[List[int]]] = None,
round_nearest: int = 8,
block: Optional[nn.Cell] = None,
norm: Optional[nn.Cell] = None,
) -> None:
super(MobileNetV2, self).__init__()
if not block:
block = InvertedResidual
if not norm:
norm = nn.BatchNorm2d
input_channel = make_divisible(32 * alpha, round_nearest)
last_channel = make_divisible(1280 * max(1.0, alpha), round_nearest)
# Setting of inverted residual blocks.
if not inverted_residual_setting:
inverted_residual_setting = [
# t, c, n, s
[1, 16, 1, 1],
[6, 24, 2, 2],
[6, 32, 3, 2],
[6, 64, 4, 2],
[6, 96, 3, 1],
[6, 160, 3, 2],
[6, 320, 1, 1],
]
# Building first layer.
features: List[nn.Cell] = [
ConvNormActivation(3, input_channel, stride=2, norm=norm, activation=nn.ReLU6)
]
# Building inverted residual blocks.
# t: The expansion factor.
# c: Number of output channel.
# n: Number of block.
# s: First block stride.
for t, c, n, s in inverted_residual_setting:
output_channel = make_divisible(c * alpha, round_nearest)
for i in range(n):
stride = s if i == 0 else 1
features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm=norm))
input_channel = output_channel
# Building last several layers.
features.append(
ConvNormActivation(input_channel, last_channel, kernel_size=1, norm=norm, activation=nn.ReLU6)
)
# Make it nn.CellList.
self.features = nn.SequentialCell(features)
def construct(self, x):
x = self.features(x)
return x