Source code for mindvision.classification.models.backbones.mobilenet_v2

# Copyright 2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
""" MobileNetV2 backbone."""

from typing import Optional, List

from mindspore import nn
from mindspore.ops.operations import Add

from mindvision.classification.models.blocks import ConvNormActivation
from mindvision.classification.models.utils import make_divisible
from mindvision.engine.class_factory import ClassFactory, ModuleType

__all__ = [
    "MobileNetV2",
    "InvertedResidual"
]


[docs]class InvertedResidual(nn.Cell): """ Mobilenetv2 residual block definition. Args: in_channel (int): The input channel. out_channel (int): The output channel. stride (int): The Stride size for the first convolutional layer. Default: 1. expand_ratio (int): The expand ration of input channel. norm (nn.Cell, optional): The norm layer that will be stacked on top of the convoution layer. Default: None. Returns: Tensor, output tensor. Examples: >>> from mindvision.classification.models.backbones import InvertedResidual >>> InvertedResidual(3, 256, 1, 1) """ def __init__(self, in_channel: int, out_channel: int, stride: int, expand_ratio: int, norm: Optional[nn.Cell] = None ) -> None: super(InvertedResidual, self).__init__() assert stride in [1, 2] if not norm: norm = nn.BatchNorm2d hidden_dim = round(in_channel * expand_ratio) self.use_res_connect = stride == 1 and in_channel == out_channel layers: List[nn.Cell] = [] if expand_ratio != 1: # pw layers.append( ConvNormActivation(in_channel, hidden_dim, kernel_size=1, norm=norm, activation=nn.ReLU6) ) layers.extend([ # dw ConvNormActivation( hidden_dim, hidden_dim, stride=stride, groups=hidden_dim, norm=norm, activation=nn.ReLU6 ), # pw-linear nn.Conv2d(hidden_dim, out_channel, kernel_size=1, stride=1, has_bias=False), norm(out_channel) ]) self.conv = nn.SequentialCell(layers) self.add = Add() def construct(self, x): identity = x x = self.conv(x) if self.use_res_connect: return self.add(identity, x) return x
[docs]@ClassFactory.register(ModuleType.BACKBONE) class MobileNetV2(nn.Cell): """ MobileNetV2 architecture. Args: alpha (int): The channels multiplier for round to 8/16 and others. Default: 1.0. inverted_residual_setting (list, optional): Inverted residual settings. Default: None. round_nearest (int): Round the number of channels in each layer to be a multiple of this number set to 1 to turn off rounding. Default is 8. block (nn.Cell, optional): Module specifying inverted residual building block for mobilenet. Default: None. norm (nn.Cell, optional): Norm layer that will be stacked on top of the convoution layer. Default: None. Inputs: - **x** (Tensor) - Tensor of shape :math:`(N, C_{in}, H_{in}, W_{in})`. Outputs: Tensor of shape :math:`(N, 1280, 7, 7)` Supported Platforms: ``GPU`` Examples: >>> import numpy as np >>> import mindspore as ms >>> from mindvision.classification.models.backbones import MobileNetV2 >>> net = MobileNetV2() >>> x = ms.Tensor(np.ones([1, 3, 224, 224]), ms.float32) >>> output = net(x) >>> print(output.shape) (1, 1280, 7, 7) About MobileNetV2: The MobileNetV2 architecture is based on an inverted residual structure where the input and output of the residual block are thin bottleneck layers opposite to traditional residual models which use expanded representations in the input an MobileNetV2 uses lightweight depthwise convolutions to filter features in the intermediate expansion layer. Citation: .. code-block:: @article{2018MobileNetV2, title={MobileNetV2: Inverted Residuals and Linear Bottlenecks}, author={ Sandler, M. and Howard, A. and Zhu, M. and Zhmoginov, A. and Chen, L. C. }, journal={IEEE}, year={2018}, } """ def __init__(self, alpha: float = 1.0, inverted_residual_setting: Optional[List[List[int]]] = None, round_nearest: int = 8, block: Optional[nn.Cell] = None, norm: Optional[nn.Cell] = None, ) -> None: super(MobileNetV2, self).__init__() if not block: block = InvertedResidual if not norm: norm = nn.BatchNorm2d input_channel = make_divisible(32 * alpha, round_nearest) last_channel = make_divisible(1280 * max(1.0, alpha), round_nearest) # Setting of inverted residual blocks. if not inverted_residual_setting: inverted_residual_setting = [ # t, c, n, s [1, 16, 1, 1], [6, 24, 2, 2], [6, 32, 3, 2], [6, 64, 4, 2], [6, 96, 3, 1], [6, 160, 3, 2], [6, 320, 1, 1], ] # Building first layer. features: List[nn.Cell] = [ ConvNormActivation(3, input_channel, stride=2, norm=norm, activation=nn.ReLU6) ] # Building inverted residual blocks. # t: The expansion factor. # c: Number of output channel. # n: Number of block. # s: First block stride. for t, c, n, s in inverted_residual_setting: output_channel = make_divisible(c * alpha, round_nearest) for i in range(n): stride = s if i == 0 else 1 features.append(block(input_channel, output_channel, stride, expand_ratio=t, norm=norm)) input_channel = output_channel # Building last several layers. features.append( ConvNormActivation(input_channel, last_channel, kernel_size=1, norm=norm, activation=nn.ReLU6) ) # Make it nn.CellList. self.features = nn.SequentialCell(features) def construct(self, x): x = self.features(x) return x