# Copyright 2019-2021 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""
The module transforms.c_transforms provides common operations, including OneHotOp and TypeCast.
"""
from enum import IntEnum
import numpy as np
from mindspore.common import dtype as mstype
import mindspore._c_dataengine as cde
from .validators import check_num_classes, check_ms_type, check_fill_value, check_slice_option, check_slice_op, \
check_mask_op, check_pad_end, check_concat_type, check_random_transform_ops, check_plugin
from ..core.datatypes import mstype_to_detype
class TensorOperation:
"""
Base class Tensor Ops
"""
def __call__(self, *input_tensor_list):
tensor_row = []
for tensor in input_tensor_list:
try:
tensor_row.append(cde.Tensor(np.asarray(tensor)))
except RuntimeError:
raise TypeError("Invalid user input. Got {}: {}, cannot be converted into tensor." \
.format(type(tensor), tensor))
callable_op = cde.Execute(self.parse())
output_tensor_list = callable_op(tensor_row)
for i, element in enumerate(output_tensor_list):
arr = element.as_array()
if arr.dtype.char == 'S':
output_tensor_list[i] = np.char.decode(arr)
else:
output_tensor_list[i] = arr
return output_tensor_list[0] if len(output_tensor_list) == 1 else tuple(output_tensor_list)
def parse(self):
"""parse function - not yet implemented"""
raise NotImplementedError("TensorOperation has to implement parse() method.")
[docs]class OneHot(TensorOperation):
"""
Tensor operation to apply one hot encoding.
Args:
num_classes (int): Number of classes of objects in dataset.
It should be larger than the largest label number in the dataset.
Raises:
TypeError: `num_classes` is not of type int.
RuntimeError: Input tensor is not of type int.
RuntimeError: Input tensor is not a 1-D tensor.
Supported Platforms:
``CPU``
Examples:
>>> # Assume that dataset has 10 classes, thus the label ranges from 0 to 9
>>> onehot_op = c_transforms.OneHot(num_classes=10)
>>> mnist_dataset = mnist_dataset.map(operations=onehot_op, input_columns=["label"])
"""
@check_num_classes
def __init__(self, num_classes):
self.num_classes = num_classes
def parse(self):
return cde.OneHotOperation(self.num_classes)
[docs]class Fill(TensorOperation):
"""
Tensor operation to fill all elements in the tensor with the specified value.
The output tensor will have the same shape and type as the input tensor.
Args:
fill_value (Union[str, bytes, int, float, bool]) : scalar value
to fill the tensor with.
Raises:
TypeError: If `fill_value` is not of type str, float, bool, int or bytes.
Supported Platforms:
``CPU``
Examples:
>>> import numpy as np
>>> # generate a 1D integer numpy array from 0 to 4
>>> def generator_1d():
... for i in range(5):
... yield (np.array([i]),)
>>> generator_dataset = ds.GeneratorDataset(generator_1d, column_names="col1")
>>> # [[0], [1], [2], [3], [4]]
>>> fill_op = c_transforms.Fill(3)
>>> generator_dataset = generator_dataset.map(operations=fill_op)
>>> # [[3], [3], [3], [3], [3]]
"""
@check_fill_value
def __init__(self, fill_value):
self.fill_value = cde.Tensor(np.array(fill_value))
def parse(self):
return cde.FillOperation(self.fill_value)
[docs]class TypeCast(TensorOperation):
"""
Tensor operation to cast to a given MindSpore data type.
Note:
This operation supports running on Ascend or GPU platforms by Offload.
Args:
data_type (mindspore.dtype): mindspore.dtype to be cast to.
Raises:
TypeError: If `data_type` is not of type bool, int, float or string.
Supported Platforms:
``CPU`` ``Ascend`` ``GPU``
Examples:
>>> import numpy as np
>>> from mindspore import dtype as mstype
>>>
>>> # Generate 1d int numpy array from 0 - 63
>>> def generator_1d():
... for i in range(64):
... yield (np.array([i]),)
>>>
>>> dataset = ds.GeneratorDataset(generator_1d, column_names='col')
>>> type_cast_op = c_transforms.TypeCast(mstype.int32)
>>> dataset = dataset.map(operations=type_cast_op)
"""
@check_ms_type
def __init__(self, data_type):
data_type = mstype_to_detype(data_type)
self.data_type = str(data_type)
def parse(self):
return cde.TypeCastOperation(self.data_type)
class _SliceOption(cde.SliceOption):
"""
Internal class SliceOption to be used with SliceOperation
Args:
_SliceOption(Union[int, list(int), slice, None, Ellipsis, bool, _SliceOption]):
1. :py:obj:`int`: Slice this index only along the dimension. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices along the dimension. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the slice object along the dimension.
4. :py:obj:`None`: Slice the whole dimension. Similar to :py:obj:`:` in Python indexing.
5. :py:obj:`Ellipsis`: Slice the whole dimension. Similar to :py:obj:`:` in Python indexing.
6. :py:obj:`boolean`: Slice the whole dimension. Similar to :py:obj:`:` in Python indexing.
"""
@check_slice_option
def __init__(self, slice_option):
if isinstance(slice_option, int) and not isinstance(slice_option, bool):
slice_option = [slice_option]
elif slice_option is Ellipsis:
slice_option = True
elif slice_option is None:
slice_option = True
super().__init__(slice_option)
[docs]class Slice(TensorOperation):
"""
Slice operation to extract a tensor out using the given n slices.
The functionality of Slice is similar to NumPy's indexing feature (Currently only rank-1 tensors are supported).
Args:
slices (Union[int, list[int], slice, None, Ellipsis]):
Maximum `n` number of arguments to slice a tensor of rank `n` .
One object in slices can be one of:
1. :py:obj:`int`: Slice this index only along the first dimension. Negative index is supported.
2. :py:obj:`list(int)`: Slice these indices along the first dimension. Negative indices are supported.
3. :py:obj:`slice`: Slice the generated indices from the
`slice <https://docs.python.org/3.7/library/functions.html?highlight=slice#slice>`_ object along the
first dimension. Similar to start:stop:step.
4. :py:obj:`None`: Slice the whole dimension. Similar to :py:obj:`[:]` in Python indexing.
5. :py:obj:`Ellipsis`: Slice the whole dimension, same result with `None`.
Raises:
TypeError: If `slices` is not of type int, list[int], :py:obj:`slice`, :py:obj:`None` or :py:obj:`Ellipsis`.
Supported Platforms:
``CPU``
Examples:
>>> # Data before
>>> # | col |
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------|
>>> data = [[1, 2, 3]]
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["col"])
>>> # slice indices 1 and 2 only
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.Slice(slice(1,3)))
>>> # Data after
>>> # | col |
>>> # +---------+
>>> # | [2,3] |
>>> # +---------|
"""
@check_slice_op
def __init__(self, *slices):
slice_input_ = list(slices)
slice_input_ = [_SliceOption(slice_dim) for slice_dim in slice_input_]
self.slice_input_ = slice_input_
def parse(self):
return cde.SliceOperation(self.slice_input_)
[docs]class Relational(IntEnum):
"""
Relationship operator.
Possible enumeration values are: Relational.EQ, Relational.NE, Relational.GT, Relational.GE, Relational.LT,
Relational.LE.
- Relational.EQ: refers to Equality.
- Relational.NE: refers not equal, or Inequality.
- Relational.GT: refers to Greater than.
- Relational.GE: refers to Greater than or equal to.
- Relational.LT: refers to Less than.
- Relational.LE: refers to Less than or equal to.
"""
EQ = 0
NE = 1
GT = 2
GE = 3
LT = 4
LE = 5
DE_C_RELATIONAL = {Relational.EQ: cde.RelationalOp.EQ,
Relational.NE: cde.RelationalOp.NE,
Relational.GT: cde.RelationalOp.GT,
Relational.GE: cde.RelationalOp.GE,
Relational.LT: cde.RelationalOp.LT,
Relational.LE: cde.RelationalOp.LE}
[docs]class Mask(TensorOperation):
r"""
Mask content of the input tensor with the given predicate.
Any element of the tensor that matches the predicate will be evaluated to True, otherwise False.
Args:
operator (Relational): relational operators, it can be any of [Relational.EQ, Relational.NE, Relational.LT,
Relational.GT, Relational.LE, Relational.GE], take Relational.EQ as example, EQ refers to equal.
constant (Union[str, int, float, bool]): Constant to be compared to.
dtype (mindspore.dtype, optional): Type of the generated mask. Default: mindspore.dtype.bool\_.
Raises:
TypeError: `operator` is not of type Relational.
TypeError: `constant` is not of type string int, float or bool.
TypeError: `dtype` is not of type mindspore.dtype.
Supported Platforms:
``CPU``
Examples:
>>> from mindspore.dataset.transforms.c_transforms import Relational
>>> # Data before
>>> # | col |
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------+
>>> data = [[1, 2, 3]]
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["col"])
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.Mask(Relational.EQ, 2))
>>> # Data after
>>> # | col |
>>> # +--------------------+
>>> # | [False,True,False] |
>>> # +--------------------+
"""
@check_mask_op
def __init__(self, operator, constant, dtype=mstype.bool_):
self.operator = operator
self.dtype = mstype_to_detype(dtype)
self.constant = cde.Tensor(np.array(constant))
def parse(self):
return cde.MaskOperation(DE_C_RELATIONAL[self.operator], self.constant, self.dtype)
[docs]class PadEnd(TensorOperation):
"""
Pad input tensor according to pad_shape, input tensor needs to have same rank.
Args:
pad_shape (list(int)): List of integers representing the shape needed. Dimensions that set to `None` will
not be padded (i.e., original dim will be used). Shorter dimensions will truncate the values.
pad_value (Union[str, bytes, int, float, bool], optional): Value used to pad. Default to 0 or empty
string in case of tensors of strings.
Raises:
TypeError: If `pad_shape` is not of type list.
TypeError: If `pad_value` is not of type str, float, bool, int or bytes.
TypeError: If elements of `pad_shape` is not of type int.
ValueError: If elements of `pad_shape` is not of positive.
Supported Platforms:
``CPU``
Examples:
>>> # Data before
>>> # | col |
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------|
>>> data = [[1, 2, 3]]
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["col"])
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.PadEnd(pad_shape=[4],
... pad_value=10))
>>> # Data after
>>> # | col |
>>> # +------------+
>>> # | [1,2,3,10] |
>>> # +------------|
"""
@check_pad_end
def __init__(self, pad_shape, pad_value=None):
self.pad_shape = cde.TensorShape(pad_shape)
self.pad_value = cde.Tensor(np.array(pad_value)) if pad_value is not None else pad_value
def parse(self):
return cde.PadEndOperation(self.pad_shape, self.pad_value)
[docs]class Concatenate(TensorOperation):
"""
Tensor operation that concatenates all columns into a single tensor.
Args:
axis (int, optional): Concatenate the tensors along given axis (Default=0).
prepend (numpy.array, optional): NumPy array to be prepended to the already concatenated tensors
(Default=None).
append (numpy.array, optional): NumPy array to be appended to the already concatenated tensors (Default=None).
Raises:
TypeError: If `axis` is not of type int.
TypeError: If `prepend` is not of type numpy.ndarray.
TypeError: If `append` is not of type numpy.ndarray.
Supported Platforms:
``CPU``
Examples:
>>> import numpy as np
>>> # concatenate string
>>> prepend_tensor = np.array(["dw", "df"], dtype='S')
>>> append_tensor = np.array(["dwsdf", "df"], dtype='S')
>>> concatenate_op = c_transforms.Concatenate(0, prepend_tensor, append_tensor)
>>> data = [["This","is","a","string"]]
>>> dataset = ds.NumpySlicesDataset(data)
>>> dataset = dataset.map(operations=concatenate_op)
"""
@check_concat_type
def __init__(self, axis=0, prepend=None, append=None):
self.axis = axis
self.prepend = cde.Tensor(np.array(prepend)) if prepend is not None else prepend
self.append = cde.Tensor(np.array(append)) if append is not None else append
def parse(self):
return cde.ConcatenateOperation(self.axis, self.prepend, self.append)
[docs]class Duplicate(TensorOperation):
"""
Duplicate the input tensor to output, only support transform one column each time.
Raises:
RuntimeError: If given tensor has two columns.
Supported Platforms:
``CPU``
Examples:
>>> # Data before
>>> # | x |
>>> # +---------+
>>> # | [1,2,3] |
>>> # +---------+
>>> data = [[1,2,3]]
>>> numpy_slices_dataset = ds.NumpySlicesDataset(data, ["x"])
>>> numpy_slices_dataset = numpy_slices_dataset.map(operations=c_transforms.Duplicate(),
... input_columns=["x"],
... output_columns=["x", "y"],
... column_order=["x", "y"])
>>> # Data after
>>> # | x | y |
>>> # +---------+---------+
>>> # | [1,2,3] | [1,2,3] |
>>> # +---------+---------+
"""
def parse(self):
return cde.DuplicateOperation()
[docs]class Unique(TensorOperation):
"""
Perform the unique operation on the input tensor, only support transform one column each time.
Return 3 tensor: unique output tensor, index tensor, count tensor.
- Output tensor contains all the unique elements of the input tensor
in the same order that they occur in the input tensor.
- Index tensor that contains the index of each element of the input tensor in the unique output tensor.
- Count tensor that contains the count of each element of the output tensor in the input tensor.
Note:
Call batch op before calling this function.
Raises:
RuntimeError: If given Tensor has two columns.
Supported Platforms:
``CPU``
Examples:
>>> # Data before
>>> # | x |
>>> # +--------------------+
>>> # | [[0,1,2], [1,2,3]] |
>>> # +--------------------+
>>> data = [[[0,1,2], [1,2,3]]]
>>> dataset = ds.NumpySlicesDataset(data, ["x"])
>>> dataset = dataset.map(operations=c_transforms.Unique(),
... input_columns=["x"],
... output_columns=["x", "y", "z"],
... column_order=["x", "y", "z"])
>>> # Data after
>>> # | x | y |z |
>>> # +---------+-----------------+---------+
>>> # | [0,1,2,3] | [0,1,2,1,2,3] | [1,2,2,1]
>>> # +---------+-----------------+---------+
"""
def parse(self):
return cde.UniqueOperation()
[docs]class Compose(TensorOperation):
"""
Compose a list of transforms into a single transform.
Args:
transforms (list): List of transformations to be applied.
Raises:
TypeError: If `transforms` is not of type list.
ValueError: If `transforms` is empty.
TypeError: If elements of `transforms` are neither Python callable objects nor data
processing operations in c_transforms.
Supported Platforms:
``CPU``
Examples:
>>> compose = c_transforms.Compose([c_vision.Decode(), c_vision.RandomCrop(512)])
>>> image_folder_dataset = image_folder_dataset.map(operations=compose)
"""
@check_random_transform_ops
def __init__(self, transforms):
self.transforms = transforms
def parse(self):
operations = []
for op in self.transforms:
if op and getattr(op, 'parse', None):
operations.append(op.parse())
else:
operations.append(op)
return cde.ComposeOperation(operations)
[docs]class RandomApply(TensorOperation):
"""
Randomly perform a series of transforms with a given probability.
Args:
transforms (list): List of transformations to be applied.
prob (float, optional): The probability to apply the transformation list (default=0.5).
Raises:
TypeError: If `transforms` is not of type list.
ValueError: If `transforms` is empty.
TypeError: If elements of `transforms` are neither Python callable objects nor data
processing operations in c_transforms.
TypeError: If `prob` is not of type float.
ValueError: If `prob` is not in range [0.0, 1.0].
Supported Platforms:
``CPU``
Examples:
>>> rand_apply = c_transforms.RandomApply([c_vision.RandomCrop(512)])
>>> image_folder_dataset = image_folder_dataset.map(operations=rand_apply)
"""
@check_random_transform_ops
def __init__(self, transforms, prob=0.5):
self.transforms = transforms
self.prob = prob
def parse(self):
operations = []
for op in self.transforms:
if op and getattr(op, 'parse', None):
operations.append(op.parse())
else:
operations.append(op)
return cde.RandomApplyOperation(self.prob, operations)
[docs]class RandomChoice(TensorOperation):
"""
Randomly select one transform from a list of transforms to perform operation.
Args:
transforms (list): List of transformations to be chosen from to apply.
Raises:
TypeError: If `transforms` is not of type list.
ValueError: If `transforms` is empty.
TypeError: If elements of `transforms` are neither Python callable objects nor data
processing operations in c_transforms.
Supported Platforms:
``CPU``
Examples:
>>> rand_choice = c_transforms.RandomChoice([c_vision.CenterCrop(50), c_vision.RandomCrop(512)])
>>> image_folder_dataset = image_folder_dataset.map(operations=rand_choice)
"""
@check_random_transform_ops
def __init__(self, transforms):
self.transforms = transforms
def parse(self):
operations = []
for op in self.transforms:
if op and getattr(op, 'parse', None):
operations.append(op.parse())
else:
operations.append(op)
return cde.RandomChoiceOperation(operations)
class Plugin(TensorOperation):
"""
Plugin support for MindData. Use this class to dynamically load a .so file (shared library) and execute its symbols.
Args:
lib_path (str): Path to .so file which is compiled to support MindData plugin.
func_name (str): Name of the function to load from the .so file.
user_args (str, optional): Serialized args to pass to the plugin. Only needed if "func_name" requires one.
Raises:
TypeError: If `lib_path` is not of type string.
TypeError: If `func_name` is not of type string.
TypeError: If `user_args` is not of type string.
Supported Platforms:
``CPU``
Examples:
>>> plugin = c_transforms.Plugin("pluginlib.so", "PluginDecode")
>>> image_folder_dataset = image_folder_dataset.map(operations=plugin)
"""
@check_plugin
def __init__(self, lib_path, func_name, user_args=None):
self.lib_path = lib_path
self.func_name = func_name
self.user_args = str() if (user_args is None) else user_args
def parse(self):
return cde.PluginOperation(self.lib_path, self.func_name, self.user_args)