Source code for mindspore.dataset.vision.utils

# Copyright 2019-2022 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Interpolation Mode, Resampling Filters
"""
from enum import Enum, IntEnum
import numbers

import numpy as np
from PIL import Image

import mindspore
import mindspore._c_dataengine as cde

# The following constants have been deprecated by Pillow since version 9.1.0
if int(Image.__version__.split(".")[0]) > 9 or Image.__version__ >= "9.1.0":
    FLIP_LEFT_RIGHT = Image.Transpose.FLIP_LEFT_RIGHT
    FLIP_TOP_BOTTOM = Image.Transpose.FLIP_TOP_BOTTOM
    PERSPECTIVE = Image.Transform.PERSPECTIVE
    AFFINE = Image.Transform.AFFINE
    NEAREST = Image.Resampling.NEAREST
    ANTIALIAS = Image.Resampling.LANCZOS
    LINEAR = Image.Resampling.BILINEAR
    CUBIC = Image.Resampling.BICUBIC
else:
    FLIP_LEFT_RIGHT = Image.FLIP_LEFT_RIGHT
    FLIP_TOP_BOTTOM = Image.FLIP_TOP_BOTTOM
    PERSPECTIVE = Image.PERSPECTIVE
    AFFINE = Image.AFFINE
    NEAREST = Image.NEAREST
    ANTIALIAS = Image.ANTIALIAS
    LINEAR = Image.LINEAR
    CUBIC = Image.CUBIC


[docs]class AutoAugmentPolicy(str, Enum):
    """
    AutoAugment policy for different datasets.

    Possible enumeration values are: ``AutoAugmentPolicy.IMAGENET``, ``AutoAugmentPolicy.CIFAR10``,
    AutoAugmentPolicy.SVHN.

    Each policy contains 25 pairs of augmentation operations. When using AutoAugment, each image is randomly
    transformed with one of these operation pairs. Each pair has 2 different operations. The following shows
    all of these augmentation operations, including operation names with their probabilities and random params.

    - ``AutoAugmentPolicy.IMAGENET``: dataset auto augment policy for ImageNet.

      .. code-block::

          Augmentation operations pair:
          [(("Posterize", 0.4, 8), ("Rotate", 0.6, 9)),        (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
           (("Equalize", 0.8, None), ("Equalize", 0.6, None)), (("Posterize", 0.6, 7), ("Posterize", 0.6, 6)),
           (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),    (("Equalize", 0.4, None), ("Rotate", 0.8, 8)),
           (("Solarize", 0.6, 3), ("Equalize", 0.6, None)),    (("Posterize", 0.8, 5), ("Equalize", 1.0, None)),
           (("Rotate", 0.2, 3), ("Solarize", 0.6, 8)),         (("Equalize", 0.6, None), ("Posterize", 0.4, 6)),
           (("Rotate", 0.8, 8), ("Color", 0.4, 0)),            (("Rotate", 0.4, 9), ("Equalize", 0.6, None)),
           (("Equalize", 0.0, None), ("Equalize", 0.8, None)), (("Invert", 0.6, None), ("Equalize", 1.0, None)),
           (("Color", 0.6, 4), ("Contrast", 1.0, 8)),          (("Rotate", 0.8, 8), ("Color", 1.0, 2)),
           (("Color", 0.8, 8), ("Solarize", 0.8, 7)),          (("Sharpness", 0.4, 7), ("Invert", 0.6, None)),
           (("ShearX", 0.6, 5), ("Equalize", 1.0, None)),      (("Color", 0.4, 0), ("Equalize", 0.6, None)),
           (("Equalize", 0.4, None), ("Solarize", 0.2, 4)),    (("Solarize", 0.6, 5), ("AutoContrast", 0.6, None)),
           (("Invert", 0.6, None), ("Equalize", 1.0, None)),   (("Color", 0.6, 4), ("Contrast", 1.0, 8)),
           (("Equalize", 0.8, None), ("Equalize", 0.6, None))]

    - ``AutoAugmentPolicy.CIFAR10``: dataset auto augment policy for Cifar10.

      .. code-block::

          Augmentation operations pair:
          [(("Invert", 0.1, None), ("Contrast", 0.2, 6)),         (("Rotate", 0.7, 2), ("TranslateX", 0.3, 9)),
           (("Sharpness", 0.8, 1), ("Sharpness", 0.9, 3)),         (("ShearY", 0.5, 8), ("TranslateY", 0.7, 9)),
           (("AutoContrast", 0.5, None), ("Equalize", 0.9, None)), (("ShearY", 0.2, 7), ("Posterize", 0.3, 7)),
           (("Color", 0.4, 3), ("Brightness", 0.6, 7)),            (("Sharpness", 0.3, 9), ("Brightness", 0.7, 9)),
           (("Equalize", 0.6, None), ("Equalize", 0.5, None)),     (("Contrast", 0.6, 7), ("Sharpness", 0.6, 5)),
           (("Color", 0.7, 7), ("TranslateX", 0.5, 8)),            (("Equalize", 0.8, None), ("Invert", 0.1, None)),
           (("TranslateY", 0.4, 3), ("Sharpness", 0.2, 6)),        (("Brightness", 0.9, 6), ("Color", 0.2, 8)),
           (("Solarize", 0.5, 2), ("Invert", 0.0, None)),          (("TranslateY", 0.9, 9), ("TranslateY", 0.7, 9)),
           (("Equalize", 0.2, None), ("Equalize", 0.6, None)),     (("Color", 0.9, 9), ("Equalize", 0.6, None)),
           (("AutoContrast", 0.8, None), ("Solarize", 0.2, 8)),    (("Brightness", 0.1, 3), ("Color", 0.7, 0)),
           (("Solarize", 0.4, 5), ("AutoContrast", 0.9, None)),
           (("AutoContrast", 0.9, None), ("Solarize", 0.8, 3)),
           (("TranslateY", 0.7, 9), ("AutoContrast", 0.9, None)),
           (("Equalize", 0.3, None), ("AutoContrast", 0.4, None)),
           (("Equalize", 0.2, None), ("AutoContrast", 0.6, None))]

    - ``AutoAugmentPolicy.SVHN``: dataset auto augment policy for SVHN.

      .. code-block::

          Augmentation operations pair:
          [(("ShearX", 0.9, 4), ("Invert", 0.2, None)),          (("ShearY", 0.9, 8), ("Invert", 0.7, None)),
           (("Equalize", 0.6, None), ("Solarize", 0.6, 6)),      (("Invert", 0.9, None), ("Equalize", 0.6, None)),
           (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),        (("ShearX", 0.9, 4), ("AutoContrast", 0.8, None)),
           (("ShearY", 0.9, 8), ("Invert", 0.4, None)),          (("ShearY", 0.9, 5), ("Solarize", 0.2, 6)),
           (("Invert", 0.9, None), ("AutoContrast", 0.8, None)), (("Equalize", 0.6, None), ("Rotate", 0.9, 3)),
           (("ShearX", 0.9, 4), ("Solarize", 0.3, 3)),           (("ShearY", 0.8, 8), ("Invert", 0.7, None)),
           (("Equalize", 0.9, None), ("TranslateY", 0.6, 6)),    (("Invert", 0.9, None), ("Equalize", 0.6, None)),
           (("Contrast", 0.3, 3), ("Rotate", 0.8, 4)),           (("Invert", 0.8, None), ("TranslateY", 0.0, 2)),
           (("ShearY", 0.7, 6), ("Solarize", 0.4, 8)),           (("Invert", 0.6, None), ("Rotate", 0.8, 4)),
           (("ShearY", 0.3, 7), ("TranslateX", 0.9, 3)),         (("ShearX", 0.1, 6), ("Invert", 0.6, None)),
           (("Solarize", 0.7, 2), ("TranslateY", 0.6, 7)),       (("ShearY", 0.8, 4), ("Invert", 0.8, None)),
           (("ShearX", 0.7, 9), ("TranslateY", 0.8, 3)),         (("ShearY", 0.8, 5), ("AutoContrast", 0.7, None)),
           (("ShearX", 0.7, 2), ("Invert", 0.1, None))]
    """
    IMAGENET: str = "imagenet"
    CIFAR10: str = "cifar10"
    SVHN: str = "svhn"

    @staticmethod
    def to_c_type(policy):
        """
        Function to return C type for AutoAugment policy.
        """
        c_values = {AutoAugmentPolicy.IMAGENET: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_IMAGENET,
                    AutoAugmentPolicy.CIFAR10: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_CIFAR10,
                    AutoAugmentPolicy.SVHN: cde.AutoAugmentPolicy.DE_AUTO_AUGMENT_POLICY_SVHN}

        value = c_values.get(policy)
        if value is None:
            raise RuntimeError("Unsupported AutoAugmentPolicy, only support IMAGENET, CIFAR10, and SVHN.")
        return value


[docs]class Border(str, Enum):
    """
    Padding Mode, Border Type.

    Possible enumeration values are: ``Border.CONSTANT``, ``Border.EDGE``, ``Border.REFLECT``, ``Border.SYMMETRIC``.

    - ``Border.CONSTANT`` : means it fills the border with constant values.
    - ``Border.EDGE`` : means it pads with the last value on the edge.
    - ``Border.REFLECT`` : means it reflects the values on the edge omitting the last value of edge.
      For example, padding [1,2,3,4] with 2 elements on both sides will result in [3,2,1,2,3,4,3,2].
    - ``Border.SYMMETRIC`` : means it reflects the values on the edge repeating the last value of edge.
      For example, padding [1,2,3,4] with 2 elements on both sides will result in [2,1,1,2,3,4,4,3].

    Note:
        This class derived from class str to support json serializable.
    """
    CONSTANT: str = "constant"
    EDGE: str = "edge"
    REFLECT: str = "reflect"
    SYMMETRIC: str = "symmetric"

    @staticmethod
    def to_python_type(border_type):
        """
        Function to return Python type for Border Type.
        """
        python_values = {Border.CONSTANT: 'constant',
                         Border.EDGE: 'edge',
                         Border.REFLECT: 'reflect',
                         Border.SYMMETRIC: 'symmetric'}

        value = python_values.get(border_type)
        if value is None:
            raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.")
        return value

    @staticmethod
    def to_c_type(border_type):
        """
        Function to return C type for Border Type.
        """
        c_values = {Border.CONSTANT: cde.BorderType.DE_BORDER_CONSTANT,
                    Border.EDGE: cde.BorderType.DE_BORDER_EDGE,
                    Border.REFLECT: cde.BorderType.DE_BORDER_REFLECT,
                    Border.SYMMETRIC: cde.BorderType.DE_BORDER_SYMMETRIC}

        value = c_values.get(border_type)
        if value is None:
            raise RuntimeError("Unsupported Border type, only support CONSTANT, EDGE, REFLECT and SYMMETRIC.")
        return value


[docs]class ConvertMode(IntEnum):
    """
    The color conversion mode.

    Possible enumeration values are as follows:

    - ConvertMode.COLOR_BGR2BGRA: convert BGR format images to BGRA format images.
    - ConvertMode.COLOR_RGB2RGBA: convert RGB format images to RGBA format images.
    - ConvertMode.COLOR_BGRA2BGR: convert BGRA format images to BGR format images.
    - ConvertMode.COLOR_RGBA2RGB: convert RGBA format images to RGB format images.
    - ConvertMode.COLOR_BGR2RGBA: convert BGR format images to RGBA format images.
    - ConvertMode.COLOR_RGB2BGRA: convert RGB format images to BGRA format images.
    - ConvertMode.COLOR_RGBA2BGR: convert RGBA format images to BGR format images.
    - ConvertMode.COLOR_BGRA2RGB: convert BGRA format images to RGB format images.
    - ConvertMode.COLOR_BGR2RGB: convert BGR format images to RGB format images.
    - ConvertMode.COLOR_RGB2BGR: convert RGB format images to BGR format images.
    - ConvertMode.COLOR_BGRA2RGBA: convert BGRA format images to RGBA format images.
    - ConvertMode.COLOR_RGBA2BGRA: convert RGBA format images to BGRA format images.
    - ConvertMode.COLOR_BGR2GRAY: convert BGR format images to GRAY format images.
    - ConvertMode.COLOR_RGB2GRAY: convert RGB format images to GRAY format images.
    - ConvertMode.COLOR_GRAY2BGR: convert GRAY format images to BGR format images.
    - ConvertMode.COLOR_GRAY2RGB: convert GRAY format images to RGB format images.
    - ConvertMode.COLOR_GRAY2BGRA: convert GRAY format images to BGRA format images.
    - ConvertMode.COLOR_GRAY2RGBA: convert GRAY format images to RGBA format images.
    - ConvertMode.COLOR_BGRA2GRAY: convert BGRA format images to GRAY format images.
    - ConvertMode.COLOR_RGBA2GRAY: convert RGBA format images to GRAY format images.
    """
    COLOR_BGR2BGRA = 0
    COLOR_RGB2RGBA = COLOR_BGR2BGRA
    COLOR_BGRA2BGR = 1
    COLOR_RGBA2RGB = COLOR_BGRA2BGR
    COLOR_BGR2RGBA = 2
    COLOR_RGB2BGRA = COLOR_BGR2RGBA
    COLOR_RGBA2BGR = 3
    COLOR_BGRA2RGB = COLOR_RGBA2BGR
    COLOR_BGR2RGB = 4
    COLOR_RGB2BGR = COLOR_BGR2RGB
    COLOR_BGRA2RGBA = 5
    COLOR_RGBA2BGRA = COLOR_BGRA2RGBA
    COLOR_BGR2GRAY = 6
    COLOR_RGB2GRAY = 7
    COLOR_GRAY2BGR = 8
    COLOR_GRAY2RGB = COLOR_GRAY2BGR
    COLOR_GRAY2BGRA = 9
    COLOR_GRAY2RGBA = COLOR_GRAY2BGRA
    COLOR_BGRA2GRAY = 10
    COLOR_RGBA2GRAY = 11

    @staticmethod
    def to_c_type(mode):
        """
        Function to return C type for color mode.
        """
        c_values = {ConvertMode.COLOR_BGR2BGRA: cde.ConvertMode.DE_COLOR_BGR2BGRA,
                    ConvertMode.COLOR_RGB2RGBA: cde.ConvertMode.DE_COLOR_RGB2RGBA,
                    ConvertMode.COLOR_BGRA2BGR: cde.ConvertMode.DE_COLOR_BGRA2BGR,
                    ConvertMode.COLOR_RGBA2RGB: cde.ConvertMode.DE_COLOR_RGBA2RGB,
                    ConvertMode.COLOR_BGR2RGBA: cde.ConvertMode.DE_COLOR_BGR2RGBA,
                    ConvertMode.COLOR_RGB2BGRA: cde.ConvertMode.DE_COLOR_RGB2BGRA,
                    ConvertMode.COLOR_RGBA2BGR: cde.ConvertMode.DE_COLOR_RGBA2BGR,
                    ConvertMode.COLOR_BGRA2RGB: cde.ConvertMode.DE_COLOR_BGRA2RGB,
                    ConvertMode.COLOR_BGR2RGB: cde.ConvertMode.DE_COLOR_BGR2RGB,
                    ConvertMode.COLOR_RGB2BGR: cde.ConvertMode.DE_COLOR_RGB2BGR,
                    ConvertMode.COLOR_BGRA2RGBA: cde.ConvertMode.DE_COLOR_BGRA2RGBA,
                    ConvertMode.COLOR_RGBA2BGRA: cde.ConvertMode.DE_COLOR_RGBA2BGRA,
                    ConvertMode.COLOR_BGR2GRAY: cde.ConvertMode.DE_COLOR_BGR2GRAY,
                    ConvertMode.COLOR_RGB2GRAY: cde.ConvertMode.DE_COLOR_RGB2GRAY,
                    ConvertMode.COLOR_GRAY2BGR: cde.ConvertMode.DE_COLOR_GRAY2BGR,
                    ConvertMode.COLOR_GRAY2RGB: cde.ConvertMode.DE_COLOR_GRAY2RGB,
                    ConvertMode.COLOR_GRAY2BGRA: cde.ConvertMode.DE_COLOR_GRAY2BGRA,
                    ConvertMode.COLOR_GRAY2RGBA: cde.ConvertMode.DE_COLOR_GRAY2RGBA,
                    ConvertMode.COLOR_BGRA2GRAY: cde.ConvertMode.DE_COLOR_BGRA2GRAY,
                    ConvertMode.COLOR_RGBA2GRAY: cde.ConvertMode.DE_COLOR_RGBA2GRAY,
                    }

        mode = c_values.get(mode)
        if mode is None:
            raise RuntimeError("Unsupported ConvertMode, see https://www.mindspore.cn/docs/zh-CN/r2.3.0rc2/api_python/"
                               "dataset_vision/mindspore.dataset.vision.ConvertColor.html for more details.")
        return mode


[docs]class ImageBatchFormat(IntEnum):
    """
    Data Format of images after batch operation.

    Possible enumeration values are: ``ImageBatchFormat.NHWC``, ``ImageBatchFormat.NCHW``.

    - ``ImageBatchFormat.NHWC``: in orders like, batch N, height H, width W, channels C to store the data.
    - ``ImageBatchFormat.NCHW``: in orders like, batch N, channels C, height H, width W to store the data.
    """
    NHWC = 0
    NCHW = 1

    @staticmethod
    def to_c_type(image_batch_format):
        """
        Function to return C type for ImageBatchFormat.
        """
        c_values = {ImageBatchFormat.NHWC: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NHWC,
                    ImageBatchFormat.NCHW: cde.ImageBatchFormat.DE_IMAGE_BATCH_FORMAT_NCHW}

        value = c_values.get(image_batch_format)
        if value is None:
            raise RuntimeError("Unsupported ImageBatchFormat, only support NHWC and NCHW.")
        return value


[docs]class ImageReadMode(IntEnum):
    """
    The read mode used for the image file.

    Possible enumeration values are: ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``ImageReadMode.COLOR``.

    - ``ImageReadMode.UNCHANGED``: remain the output in the original format.
    - ``ImageReadMode.GRAYSCALE``: convert the output into one channel grayscale data.
    - ``ImageReadMode.COLOR``: convert the output into three channels RGB color data.
    """
    UNCHANGED = 0
    GRAYSCALE = 1
    COLOR = 2

    @staticmethod
    def to_c_type(image_read_mode):
        """
        Function to return C type for ImageReadMode.
        """
        c_values = {ImageReadMode.UNCHANGED: cde.ImageReadMode.DE_IMAGE_READ_MODE_UNCHANGED,
                    ImageReadMode.GRAYSCALE: cde.ImageReadMode.DE_IMAGE_READ_MODE_GRAYSCALE,
                    ImageReadMode.COLOR: cde.ImageReadMode.DE_IMAGE_READ_MODE_COLOR}

        value = c_values.get(image_read_mode)
        if value is None:
            raise RuntimeError("Unsupported ImageReadMode, only support UNCHANGED, GRAYSCALE and COLOR.")
        return value


[docs]class Inter(IntEnum):
    """
    Interpolation methods.

    Available values are as follows:

    - ``Inter.NEAREST`` : Nearest neighbor interpolation.
    - ``Inter.ANTIALIAS`` : Antialias interpolation. Supported only when the input is PIL.Image.Image.
    - ``Inter.LINEAR`` : Linear interpolation, the same as ``Inter.BILINEAR``.
    - ``Inter.BILINEAR`` : Bilinear interpolation.
    - ``Inter.CUBIC`` : Cubic interpolation, the same as ``Inter.BICUBIC``.
    - ``Inter.BICUBIC`` : Bicubic interpolation.
    - ``Inter.AREA`` : Pixel area interpolation. Supported only when the input is numpy.ndarray.
    - ``Inter.PILCUBIC`` : Pillow implementation of bicubic interpolation. Supported only when the input
      is numpy.ndarray.
    """
    NEAREST = 0
    ANTIALIAS = 1
    BILINEAR = LINEAR = 2
    BICUBIC = CUBIC = 3
    AREA = 4
    PILCUBIC = 5

    @staticmethod
    def to_python_type(inter_type):
        """
        Function to return Python type for Interpolation Mode.
        """
        python_values = {Inter.NEAREST: NEAREST,
                         Inter.ANTIALIAS: ANTIALIAS,
                         Inter.LINEAR: LINEAR,
                         Inter.CUBIC: CUBIC}

        value = python_values.get(inter_type)
        if value is None:
            raise RuntimeError("Unsupported interpolation, only support NEAREST, ANTIALIAS, LINEAR and CUBIC.")
        return value

    @staticmethod
    def to_c_type(inter_type):
        """
        Function to return C type for Interpolation Mode.
        """
        c_values = {Inter.NEAREST: cde.InterpolationMode.DE_INTER_NEAREST_NEIGHBOUR,
                    Inter.LINEAR: cde.InterpolationMode.DE_INTER_LINEAR,
                    Inter.CUBIC: cde.InterpolationMode.DE_INTER_CUBIC,
                    Inter.AREA: cde.InterpolationMode.DE_INTER_AREA,
                    Inter.PILCUBIC: cde.InterpolationMode.DE_INTER_PILCUBIC}

        value = c_values.get(inter_type)
        if value is None:
            raise RuntimeError("Unsupported interpolation, only support NEAREST, LINEAR, CUBIC, AREA and PILCUBIC.")

        return value


[docs]class SliceMode(IntEnum):
    """
    Mode to Slice Tensor into multiple parts.

    Possible enumeration values are: ``SliceMode.PAD``, ``SliceMode.DROP``.

    - ``SliceMode.PAD``: pad some pixels before slice the Tensor if needed.
    - ``SliceMode.DROP``: drop remainder pixels before slice the Tensor if needed.
    """
    PAD = 0
    DROP = 1

    @staticmethod
    def to_c_type(mode):
        """
        Function to return C type for SliceMode.
        """
        c_values = {SliceMode.PAD: cde.SliceMode.DE_SLICE_PAD,
                    SliceMode.DROP: cde.SliceMode.DE_SLICE_DROP}

        value = c_values.get(mode)
        if value is None:
            raise RuntimeError("Unsupported SliceMode, only support PAD and DROP.")
        return value


[docs]def encode_jpeg(image, quality=75):
    """
    Encode the input image as JPEG data.

    Args:
        image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded.
        quality (int, optional): Quality of the resulting JPEG data, in range of [1, 100]. Default: ``75``.

    Returns:
        numpy.ndarray, one dimension uint8 data.

    Raises:
        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
        TypeError: If `quality` is not of type int.
        RuntimeError: If the data type of `image` is not uint8.
        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
        RuntimeError: If `quality` is less than 1 or greater than 100.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> import numpy as np
        >>> # Generate a random image with height=120, width=340, channels=3
        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
        >>> jpeg_data = vision.encode_jpeg(image)
    """
    if not isinstance(quality, int):
        raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality)))
    if isinstance(image, np.ndarray):
        return cde.encode_jpeg(cde.Tensor(image), quality).as_array()
    if isinstance(image, mindspore.Tensor):
        return cde.encode_jpeg(cde.Tensor(image.asnumpy()), quality).as_array()
    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
                                                                                  mindspore.Tensor, type(image)))


[docs]def encode_png(image, compression_level=6):
    """
    Encode the input image as PNG data.

    Args:
        image (Union[numpy.ndarray, mindspore.Tensor]): The image to be encoded.
        compression_level (int, optional): The `compression_level` for encoding, in range of [0, 9].
            Default: ``6``.

    Returns:
        numpy.ndarray, one dimension uint8 data.

    Raises:
        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
        TypeError: If `compression_level` is not of type int.
        RuntimeError: If the data type of `image` is not uint8.
        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
        RuntimeError: If `compression_level` is less than 0 or greater than 9.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> import numpy as np
        >>> # Generate a random image with height=120, width=340, channels=3
        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
        >>> png_data = vision.encode_png(image)
    """
    if not isinstance(compression_level, int):
        raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int,
                                                                                           type(compression_level)))
    if isinstance(image, np.ndarray):
        return cde.encode_png(cde.Tensor(image), compression_level).as_array()
    if isinstance(image, mindspore.Tensor):
        return cde.encode_png(cde.Tensor(image.asnumpy()), compression_level).as_array()
    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
                                                                                  mindspore.Tensor, type(image)))


[docs]def get_image_num_channels(image):
    """
    Get the number of input image channels.

    Args:
        image (Union[numpy.ndarray, PIL.Image.Image]): Image to get the number of channels.

    Returns:
        int, the number of input image channels.

    Raises:
        RuntimeError: If the dimension of `image` is less than 2.
        TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>.

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> from PIL import Image
        >>> image = Image.open("/path/to/image_file")
        >>> num_channels = vision.get_image_num_channels(image)
    """

    if isinstance(image, np.ndarray):
        return cde.get_image_num_channels(cde.Tensor(image))

    if isinstance(image, Image.Image):
        if hasattr(image, "getbands"):
            return len(image.getbands())

        return image.channels

    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))


[docs]def get_image_size(image):
    """
    Get the size of input image as [height, width].

    Args:
        image (Union[numpy.ndarray, PIL.Image.Image]): The image to get size.

    Returns:
        list[int, int], the image size.

    Raises:
        RuntimeError: If the dimension of `image` is less than 2.
        TypeError: If `image` is not of type <class 'numpy.ndarray'> or <class 'PIL.Image.Image'>.

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> from PIL import Image
        >>> image = Image.open("/path/to/image_file")
        >>> image_size = vision.get_image_size(image)
    """

    if isinstance(image, np.ndarray):
        return cde.get_image_size(cde.Tensor(image))
    if isinstance(image, Image.Image):
        size_list = list(image.size)
        size_list[0], size_list[1] = size_list[1], size_list[0]
        return size_list

    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray, Image.Image, type(image)))


def parse_padding(padding):
    """ Parses and prepares the padding tuple"""

    if isinstance(padding, numbers.Number):
        padding = [padding] * 4
    if len(padding) == 2:
        left = right = padding[0]
        top = bottom = padding[1]
        padding = (left, top, right, bottom,)
    if isinstance(padding, list):
        padding = tuple(padding)
    return padding


[docs]def read_file(filename):
    """
    Read a file in binary mode.

    Args:
        filename(str): The path to the file to be read.

    Returns:
        numpy.ndarray, the one dimension uint8 data.

    Raises:
        TypeError: If `filename` is not of type str.
        RuntimeError: If `filename` does not exist or is not a common file.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> output = vision.read_file("/path/to/file")
    """
    if isinstance(filename, str):
        return cde.read_file(filename).as_array()
    raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))


[docs]def read_image(filename, mode=ImageReadMode.UNCHANGED):
    """
    Read a image file and decode it into one channel grayscale data or RGB color data.
    Supported file types are JPEG, PNG, BMP, TIFF.

    Args:
        filename(str): The path to the image file to be read.
        mode(ImageReadMode, optional): The mode used for decoding the image. It can be
            ``ImageReadMode.UNCHANGED``, ``ImageReadMode.GRAYSCALE``, ``IMageReadMode.COLOR``.
            Default: ``ImageReadMode.UNCHANGED``.

            - ImageReadMode.UNCHANGED, remain the output in the original format.

            - ImageReadMode.GRAYSCALE, convert the output into one channel grayscale data.

            - IMageReadMode.COLOR, convert the output into three channels RGB color data.

    Returns:
        numpy.ndarray, three dimensions uint8 data in the shape of (Height, Width, Channels).

    Raises:
        TypeError: If `filename` is not of type str.
        TypeError: If `mode` is not of type :class:`mindspore.dataset.vision.ImageReadMode` .
        RuntimeError: If `filename` does not exist, or not a regular file, or not a supported image file.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> from mindspore.dataset.vision import ImageReadMode
        >>> output = vision.read_image("/path/to/image_file", ImageReadMode.UNCHANGED)
    """
    if not isinstance(filename, str):
        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
    if not isinstance(mode, ImageReadMode):
        raise TypeError("Input mode is not of type {0}, but got: {1}.".format(ImageReadMode, type(mode)))
    return cde.read_image(filename, ImageReadMode.to_c_type(mode)).as_array()


[docs]def write_file(filename, data):
    """
    Write the one dimension uint8 data into a file using binary mode.

    Args:
        filename (str): The path to the file to be written.
        data (Union[numpy.ndarray, mindspore.Tensor]): The one dimension uint8 data to be written.

    Raises:
        TypeError: If `filename` is not of type str.
        TypeError: If `data` is not of type numpy.ndarray or mindspore.Tensor.
        RuntimeError: If the `filename` is not a common file.
        RuntimeError: If the data type of `data` is not uint8.
        RuntimeError: If the shape of `data` is not a one-dimensional array.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> import numpy as np
        >>> # Generate a random data with 1024 bytes
        >>> data = np.random.randint(256, size=(1024), dtype=np.uint8)
        >>> vision.write_file("/path/to/file", data)
    """
    if not isinstance(filename, str):
        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
    if isinstance(data, np.ndarray):
        return cde.write_file(filename, cde.Tensor(data))
    if isinstance(data, mindspore.Tensor):
        return cde.write_file(filename, cde.Tensor(data.asnumpy()))
    raise TypeError("Input data is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
                                                                                 mindspore.Tensor, type(data)))


[docs]def write_jpeg(filename, image, quality=75):
    """
    Write the image data into a JPEG file.

    Args:
        filename (str): The path to the file to be written.
        image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written.
        quality (int, optional): Quality of the resulting JPEG file, in range of [1, 100]. Default: ``75``.

    Raises:
        TypeError: If `filename` is not of type str.
        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
        TypeError: If `quality` is not of type int.
        RuntimeError: If the `filename` does not exist or not a common file.
        RuntimeError: If the data type of `image` is not uint8.
        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
        RuntimeError: If `quality` is less than 1 or greater than 100.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> import numpy as np
        >>> # Generate a random image with height=120, width=340, channels=3
        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
        >>> vision.write_jpeg("/path/to/file", image)
    """
    if not isinstance(filename, str):
        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
    if not isinstance(quality, int):
        raise TypeError("Input quality is not of type {0}, but got: {1}.".format(int, type(quality)))
    if isinstance(image, np.ndarray):
        return cde.write_jpeg(filename, cde.Tensor(image), quality)
    if isinstance(image, mindspore.Tensor):
        return cde.write_jpeg(filename, cde.Tensor(image.asnumpy()), quality)
    raise TypeError("Input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
                                                                                  mindspore.Tensor, type(image)))


[docs]def write_png(filename, image, compression_level=6):
    """
    Write the image into a PNG file.

    Args:
        filename (str): The path to the file to be written.
        image (Union[numpy.ndarray, mindspore.Tensor]): The image data to be written.
        compression_level (int, optional): Compression level for the resulting PNG file, in range of [0, 9].
            Default: ``6``.

    Raises:
        TypeError: If `filename` is not of type str.
        TypeError: If `image` is not of type numpy.ndarray or mindspore.Tensor.
        TypeError: If `compression_level` is not of type int.
        RuntimeError: If the `filename` does not exist or not a common file.
        RuntimeError: If the data type of `image` is not uint8.
        RuntimeError: If the shape of `image` is not <H, W> or <H, W, 1> or <H, W, 3>.
        RuntimeError: If `compression_level` is less than 0 or greater than 9.

    Supported Platforms:
        ``CPU``

    Examples:
        >>> import mindspore.dataset.vision as vision
        >>> import numpy as np
        >>> # Generate a random image with height=120, width=340, channels=3
        >>> image = np.random.randint(256, size=(120, 340, 3), dtype=np.uint8)
        >>> vision.write_png("/path/to/file", image)
    """
    if not isinstance(filename, str):
        raise TypeError("Input filename is not of type {0}, but got: {1}.".format(str, type(filename)))
    if not isinstance(compression_level, int):
        raise TypeError("Input compression_level is not of type {0}, but got: {1}.".format(int,
                                                                                           type(compression_level)))
    if isinstance(image, np.ndarray):
        return cde.write_png(filename, cde.Tensor(image), compression_level)
    if isinstance(image, mindspore.Tensor):
        return cde.write_png(filename, cde.Tensor(image.asnumpy()), compression_level)
    raise TypeError("The input image is not of type {0} or {1}, but got: {2}.".format(np.ndarray,
                                                                                      mindspore.Tensor, type(image)))