Program Listing for File vision.h

Return to documentation for file (include/runtime/include/dataset/vision.h)

#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_

#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision_lite.h"

namespace mindspore {
namespace dataset {
class TensorOperation;

// Transform operations for performing computer vision.
namespace vision {
class DATASET_API AdjustBrightness final : public TensorTransform {
 public:
  explicit AdjustBrightness(float brightness_factor);

  ~AdjustBrightness() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AdjustContrast final : public TensorTransform {
 public:
  explicit AdjustContrast(float contrast_factor);

  ~AdjustContrast() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AdjustGamma final : public TensorTransform {
 public:
  explicit AdjustGamma(float gamma, float gain = 1);

  ~AdjustGamma() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AdjustHue final : public TensorTransform {
 public:
  explicit AdjustHue(float hue_factor);

  ~AdjustHue() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AdjustSaturation final : public TensorTransform {
 public:
  explicit AdjustSaturation(float saturation_factor);

  ~AdjustSaturation() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AdjustSharpness final : public TensorTransform {
 public:
  explicit AdjustSharpness(float sharpness_factor);

  ~AdjustSharpness() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AutoAugment final : public TensorTransform {
 public:
  explicit AutoAugment(AutoAugmentPolicy policy = AutoAugmentPolicy::kImageNet,
                       InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
                       const std::vector<uint8_t> &fill_value = {0, 0, 0});

  ~AutoAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API AutoContrast final : public TensorTransform {
 public:
  explicit AutoContrast(float cutoff = 0.0, const std::vector<uint32_t> &ignore = {});

  ~AutoContrast() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API BoundingBoxAugment final : public TensorTransform {
 public:
  explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);

  explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);

  explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> &transform, float ratio = 0.3);

  ~BoundingBoxAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API ConvertColor final : public TensorTransform {
 public:
  explicit ConvertColor(ConvertMode convert_mode);

  ~ConvertColor() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API CutMixBatch final : public TensorTransform {
 public:
  explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);

  ~CutMixBatch() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API CutOut final : public TensorTransform {
 public:
  explicit CutOut(int32_t length, int32_t num_patches = 1, bool is_hwc = true);

  ~CutOut() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

Status DATASET_API EncodeJpeg(const mindspore::MSTensor &image, mindspore::MSTensor *output, int quality = 75);

Status DATASET_API EncodePng(const mindspore::MSTensor &image, mindspore::MSTensor *output, int compression_level = 6);

class DATASET_API Equalize final : public TensorTransform {
 public:
  Equalize();

  ~Equalize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class DATASET_API Erase final : public TensorTransform {
 public:
  Erase(int32_t top, int32_t left, int32_t height, int32_t width, const std::vector<uint8_t> &value = {0, 0, 0},
        bool inplace = false);

  ~Erase() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

Status DATASET_API GetImageNumChannels(const mindspore::MSTensor &image, dsize_t *channels);

Status DATASET_API GetImageSize(const mindspore::MSTensor &image, std::vector<dsize_t> *size);

class DATASET_API HorizontalFlip final : public TensorTransform {
 public:
  HorizontalFlip();

  ~HorizontalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class DATASET_API Invert final : public TensorTransform {
 public:
  Invert();

  ~Invert() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class DATASET_API MixUpBatch final : public TensorTransform {
 public:
  explicit MixUpBatch(float alpha = 1);

  ~MixUpBatch() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API NormalizePad final : public TensorTransform {
 public:
  NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::string &dtype = "float32",
               bool is_hwc = true)
      : NormalizePad(mean, std, StringToChar(dtype), is_hwc) {}

  NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype,
               bool is_hwc = true);

  ~NormalizePad() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API PadToSize final : public TensorTransform {
 public:
  explicit PadToSize(const std::vector<int32_t> &size, const std::vector<int32_t> &offset = {},
                     const std::vector<uint8_t> &fill_value = {0}, BorderType padding_mode = BorderType::kConstant);

  ~PadToSize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API Perspective final : public TensorTransform {
 public:
  Perspective(const std::vector<std::vector<int32_t>> &start_points,
              const std::vector<std::vector<int32_t>> &end_points, InterpolationMode interpolation);

  ~Perspective() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API Posterize final : public TensorTransform {
 public:
  explicit Posterize(uint8_t bits);

  ~Posterize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandAugment final : public TensorTransform {
 public:
  explicit RandAugment(int32_t num_ops = 2, int32_t magnitude = 9, int32_t num_magnitude_bins = 31,
                       InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
                       const std::vector<uint8_t> &fill_value = {0, 0, 0});

  ~RandAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomAutoContrast final : public TensorTransform {
 public:
  explicit RandomAutoContrast(float cutoff = 0.0, const std::vector<uint32_t> &ignore = {}, float prob = 0.5);

  ~RandomAutoContrast() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomAdjustSharpness final : public TensorTransform {
 public:
  explicit RandomAdjustSharpness(float degree, float prob = 0.5);

  ~RandomAdjustSharpness() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomColor final : public TensorTransform {
 public:
  RandomColor(float t_lb, float t_ub);

  ~RandomColor() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomColorAdjust final : public TensorTransform {
 public:
  explicit RandomColorAdjust(const std::vector<float> &brightness = {1.0, 1.0},
                             const std::vector<float> &contrast = {1.0, 1.0},
                             const std::vector<float> &saturation = {1.0, 1.0},
                             const std::vector<float> &hue = {0.0, 0.0});

  ~RandomColorAdjust() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomCrop final : public TensorTransform {
 public:
  explicit RandomCrop(const std::vector<int32_t> &size, const std::vector<int32_t> &padding = {0, 0, 0, 0},
                      bool pad_if_needed = false, const std::vector<uint8_t> &fill_value = {0, 0, 0},
                      BorderType padding_mode = BorderType::kConstant);

  ~RandomCrop() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomCropDecodeResize final : public TensorTransform {
 public:
  explicit RandomCropDecodeResize(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
                                  const std::vector<float> &ratio = {3. / 4, 4. / 3},
                                  InterpolationMode interpolation = InterpolationMode::kLinear,
                                  int32_t max_attempts = 10);

  ~RandomCropDecodeResize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomCropWithBBox final : public TensorTransform {
 public:
  explicit RandomCropWithBBox(const std::vector<int32_t> &size, const std::vector<int32_t> &padding = {0, 0, 0, 0},
                              bool pad_if_needed = false, const std::vector<uint8_t> &fill_value = {0, 0, 0},
                              BorderType padding_mode = BorderType::kConstant);

  ~RandomCropWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomEqualize final : public TensorTransform {
 public:
  explicit RandomEqualize(float prob = 0.5);

  ~RandomEqualize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomHorizontalFlip final : public TensorTransform {
 public:
  explicit RandomHorizontalFlip(float prob = 0.5);

  ~RandomHorizontalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomHorizontalFlipWithBBox final : public TensorTransform {
 public:
  explicit RandomHorizontalFlipWithBBox(float prob = 0.5);

  ~RandomHorizontalFlipWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomInvert final : public TensorTransform {
 public:
  explicit RandomInvert(float prob = 0.5);

  ~RandomInvert() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomLighting final : public TensorTransform {
 public:
  explicit RandomLighting(float alpha = 0.05);

  ~RandomLighting() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomPosterize final : public TensorTransform {
 public:
  explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});

  ~RandomPosterize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomResize final : public TensorTransform {
 public:
  explicit RandomResize(const std::vector<int32_t> &size);

  ~RandomResize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomResizeWithBBox final : public TensorTransform {
 public:
  explicit RandomResizeWithBBox(const std::vector<int32_t> &size);

  ~RandomResizeWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomResizedCrop final : public TensorTransform {
 public:
  explicit RandomResizedCrop(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
                             const std::vector<float> &ratio = {3. / 4., 4. / 3.},
                             InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);

  ~RandomResizedCrop() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomResizedCropWithBBox final : public TensorTransform {
 public:
  explicit RandomResizedCropWithBBox(const std::vector<int32_t> &size, const std::vector<float> &scale = {0.08, 1.0},
                                     const std::vector<float> &ratio = {3. / 4., 4. / 3.},
                                     InterpolationMode interpolation = InterpolationMode::kLinear,
                                     int32_t max_attempts = 10);

  ~RandomResizedCropWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomRotation final : public TensorTransform {
 public:
  explicit RandomRotation(const std::vector<float> &degrees,
                          InterpolationMode resample = InterpolationMode::kNearestNeighbour, bool expand = false,
                          const std::vector<float> &center = {}, const std::vector<uint8_t> &fill_value = {0, 0, 0});

  ~RandomRotation() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomSelectSubpolicy final : public TensorTransform {
 public:
  explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);

  explicit RandomSelectSubpolicy(
    const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);

  explicit RandomSelectSubpolicy(
    const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);

  ~RandomSelectSubpolicy() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomSharpness final : public TensorTransform {
 public:
  explicit RandomSharpness(const std::vector<float> &degrees = {0.1, 1.9});

  ~RandomSharpness() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomSolarize final : public TensorTransform {
 public:
  explicit RandomSolarize(const std::vector<uint8_t> &threshold = {0, 255});

  ~RandomSolarize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomVerticalFlip final : public TensorTransform {
 public:
  explicit RandomVerticalFlip(float prob = 0.5);

  ~RandomVerticalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RandomVerticalFlipWithBBox final : public TensorTransform {
 public:
  explicit RandomVerticalFlipWithBBox(float prob = 0.5);

  ~RandomVerticalFlipWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

Status DATASET_API ReadFile(const std::string &filename, mindspore::MSTensor *output);

Status DATASET_API ReadImage(const std::string &filename, mindspore::MSTensor *output,
                             ImageReadMode mode = ImageReadMode::kUNCHANGED);

class DATASET_API ResizedCrop final : public TensorTransform {
 public:
  ResizedCrop(int32_t top, int32_t left, int32_t height, int32_t width, const std::vector<int32_t> &size,
              InterpolationMode interpolation = InterpolationMode::kLinear);

  ~ResizedCrop() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API ResizeWithBBox final : public TensorTransform {
 public:
  explicit ResizeWithBBox(const std::vector<int32_t> &size,
                          InterpolationMode interpolation = InterpolationMode::kLinear);

  ~ResizeWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API RGBA2BGR final : public TensorTransform {
 public:
  RGBA2BGR();

  ~RGBA2BGR() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class DATASET_API RGBA2RGB final : public TensorTransform {
 public:
  RGBA2RGB();

  ~RGBA2RGB() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class DATASET_API SlicePatches final : public TensorTransform {
 public:
  explicit SlicePatches(int32_t num_height = 1, int32_t num_width = 1, SliceMode slice_mode = SliceMode::kPad,
                        uint8_t fill_value = 0);

  ~SlicePatches() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API Solarize final : public TensorTransform {
 public:
  explicit Solarize(const std::vector<float> &threshold);

  ~Solarize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API ToTensor final : public TensorTransform {
 public:
  ToTensor();
  explicit ToTensor(std::string output_type);
  explicit ToTensor(mindspore::DataType output_type);

  ~ToTensor() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API TrivialAugmentWide final : public TensorTransform {
 public:
  explicit TrivialAugmentWide(int32_t num_magnitude_bins = 31,
                              InterpolationMode interpolation = InterpolationMode::kNearestNeighbour,
                              const std::vector<uint8_t> &fill_value = {0, 0, 0});

  ~TrivialAugmentWide() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API UniformAugment final : public TensorTransform {
 public:
  explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);

  explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);

  explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);

  ~UniformAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class DATASET_API VerticalFlip final : public TensorTransform {
 public:
  VerticalFlip();

  ~VerticalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

Status DATASET_API WriteFile(const std::string &filename, const mindspore::MSTensor &data);

Status DATASET_API WriteJpeg(const std::string &filename, const mindspore::MSTensor &image, int quality = 75);

Status DATASET_API WritePng(const std::string &filename, const mindspore::MSTensor &image, int compression_level = 6);
}  // namespace vision
}  // namespace dataset
}  // namespace mindspore
#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_