Program Listing for File vision.h

Return to documentation for file (include/vision.h)

#ifndef MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_
#define MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_

#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>

#include "include/api/dual_abi_helper.h"
#include "include/api/status.h"
#include "include/dataset/constants.h"
#include "include/dataset/transforms.h"
#include "include/dataset/vision_lite.h"

namespace mindspore {
namespace dataset {

class TensorOperation;

// Transform operations for performing computer vision.
namespace vision {
class AutoContrast final : public TensorTransform {
 public:
  explicit AutoContrast(float cutoff = 0.0, std::vector<uint32_t> ignore = {});

  ~AutoContrast() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class BoundingBoxAugment final : public TensorTransform {
 public:
  explicit BoundingBoxAugment(TensorTransform *transform, float ratio = 0.3);

  explicit BoundingBoxAugment(const std::shared_ptr<TensorTransform> &transform, float ratio = 0.3);

  explicit BoundingBoxAugment(const std::reference_wrapper<TensorTransform> transform, float ratio = 0.3);

  ~BoundingBoxAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class CutMixBatch final : public TensorTransform {
 public:
  explicit CutMixBatch(ImageBatchFormat image_batch_format, float alpha = 1.0, float prob = 1.0);

  ~CutMixBatch() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class CutOut final : public TensorTransform {
 public:
  explicit CutOut(int32_t length, int32_t num_patches = 1);

  ~CutOut() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class Equalize final : public TensorTransform {
 public:
  Equalize();

  ~Equalize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class HorizontalFlip final : public TensorTransform {
 public:
  HorizontalFlip();

  ~HorizontalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class HWC2CHW final : public TensorTransform {
 public:
  HWC2CHW();

  ~HWC2CHW() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class Invert final : public TensorTransform {
 public:
  Invert();

  ~Invert() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class MixUpBatch final : public TensorTransform {
 public:
  explicit MixUpBatch(float alpha = 1);

  ~MixUpBatch() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class NormalizePad final : public TensorTransform {
 public:
  explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std,
                        const std::string &dtype = "float32")
      : NormalizePad(mean, std, StringToChar(dtype)) {}

  explicit NormalizePad(const std::vector<float> &mean, const std::vector<float> &std, const std::vector<char> &dtype);

  ~NormalizePad() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class Pad final : public TensorTransform {
 public:
  explicit Pad(std::vector<int32_t> padding, std::vector<uint8_t> fill_value = {0},
               BorderType padding_mode = BorderType::kConstant);

  ~Pad() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomColor final : public TensorTransform {
 public:
  explicit RandomColor(float t_lb, float t_ub);

  ~RandomColor() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomColorAdjust final : public TensorTransform {
 public:
  explicit RandomColorAdjust(std::vector<float> brightness = {1.0, 1.0}, std::vector<float> contrast = {1.0, 1.0},
                             std::vector<float> saturation = {1.0, 1.0}, std::vector<float> hue = {0.0, 0.0});

  ~RandomColorAdjust() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomCrop final : public TensorTransform {
 public:
  explicit RandomCrop(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
                      bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
                      BorderType padding_mode = BorderType::kConstant);

  ~RandomCrop() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomCropDecodeResize final : public TensorTransform {
 public:
  explicit RandomCropDecodeResize(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
                                  std::vector<float> ratio = {3. / 4, 4. / 3},
                                  InterpolationMode interpolation = InterpolationMode::kLinear,
                                  int32_t max_attempts = 10);

  ~RandomCropDecodeResize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomCropWithBBox final : public TensorTransform {
 public:
  explicit RandomCropWithBBox(std::vector<int32_t> size, std::vector<int32_t> padding = {0, 0, 0, 0},
                              bool pad_if_needed = false, std::vector<uint8_t> fill_value = {0, 0, 0},
                              BorderType padding_mode = BorderType::kConstant);

  ~RandomCropWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomHorizontalFlip final : public TensorTransform {
 public:
  explicit RandomHorizontalFlip(float prob = 0.5);

  ~RandomHorizontalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomHorizontalFlipWithBBox final : public TensorTransform {
 public:
  explicit RandomHorizontalFlipWithBBox(float prob = 0.5);

  ~RandomHorizontalFlipWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomPosterize final : public TensorTransform {
 public:
  explicit RandomPosterize(const std::vector<uint8_t> &bit_range = {4, 8});

  ~RandomPosterize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomResize final : public TensorTransform {
 public:
  explicit RandomResize(std::vector<int32_t> size);

  ~RandomResize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomResizeWithBBox final : public TensorTransform {
 public:
  explicit RandomResizeWithBBox(std::vector<int32_t> size);

  ~RandomResizeWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomResizedCrop final : public TensorTransform {
 public:
  explicit RandomResizedCrop(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
                             std::vector<float> ratio = {3. / 4., 4. / 3.},
                             InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);

  ~RandomResizedCrop() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomResizedCropWithBBox final : public TensorTransform {
 public:
  RandomResizedCropWithBBox(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
                            std::vector<float> ratio = {3. / 4., 4. / 3.},
                            InterpolationMode interpolation = InterpolationMode::kLinear, int32_t max_attempts = 10);

  ~RandomResizedCropWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomRotation final : public TensorTransform {
 public:
  RandomRotation(std::vector<float> degrees, InterpolationMode resample = InterpolationMode::kNearestNeighbour,
                 bool expand = false, std::vector<float> center = {}, std::vector<uint8_t> fill_value = {0, 0, 0});

  ~RandomRotation() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomSelectSubpolicy final : public TensorTransform {
 public:
  explicit RandomSelectSubpolicy(const std::vector<std::vector<std::pair<TensorTransform *, double>>> &policy);

  explicit RandomSelectSubpolicy(
    const std::vector<std::vector<std::pair<std::shared_ptr<TensorTransform>, double>>> &policy);

  explicit RandomSelectSubpolicy(
    const std::vector<std::vector<std::pair<std::reference_wrapper<TensorTransform>, double>>> &policy);

  ~RandomSelectSubpolicy() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomSharpness final : public TensorTransform {
 public:
  explicit RandomSharpness(std::vector<float> degrees = {0.1, 1.9});

  ~RandomSharpness() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomSolarize final : public TensorTransform {
 public:
  explicit RandomSolarize(std::vector<uint8_t> threshold = {0, 255});

  ~RandomSolarize() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomVerticalFlip final : public TensorTransform {
 public:
  explicit RandomVerticalFlip(float prob = 0.5);

  ~RandomVerticalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RandomVerticalFlipWithBBox final : public TensorTransform {
 public:
  explicit RandomVerticalFlipWithBBox(float prob = 0.5);

  ~RandomVerticalFlipWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class Rescale final : public TensorTransform {
 public:
  Rescale(float rescale, float shift);

  ~Rescale() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class ResizeWithBBox final : public TensorTransform {
 public:
  explicit ResizeWithBBox(std::vector<int32_t> size, InterpolationMode interpolation = InterpolationMode::kLinear);

  ~ResizeWithBBox() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class RGBA2BGR final : public TensorTransform {
 public:
  RGBA2BGR();

  ~RGBA2BGR() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class RGBA2RGB final : public TensorTransform {
 public:
  RGBA2RGB();

  ~RGBA2RGB() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class SlicePatches final : public TensorTransform {
 public:
  SlicePatches(int32_t num_height = 1, int32_t num_width = 1, SliceMode slice_mode = SliceMode::kPad,
               uint8_t fill_value = 0);

  ~SlicePatches() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class SoftDvppDecodeRandomCropResizeJpeg final : public TensorTransform {
 public:
  SoftDvppDecodeRandomCropResizeJpeg(std::vector<int32_t> size, std::vector<float> scale = {0.08, 1.0},
                                     std::vector<float> ratio = {3. / 4., 4. / 3.}, int32_t max_attempts = 10);

  ~SoftDvppDecodeRandomCropResizeJpeg() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class SoftDvppDecodeResizeJpeg final : public TensorTransform {
 public:
  explicit SoftDvppDecodeResizeJpeg(std::vector<int32_t> size);

  ~SoftDvppDecodeResizeJpeg() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class SwapRedBlue final : public TensorTransform {
 public:
  SwapRedBlue();

  ~SwapRedBlue() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

class UniformAugment final : public TensorTransform {
 public:
  explicit UniformAugment(const std::vector<TensorTransform *> &transforms, int32_t num_ops = 2);

  explicit UniformAugment(const std::vector<std::shared_ptr<TensorTransform>> &transforms, int32_t num_ops = 2);

  explicit UniformAugment(const std::vector<std::reference_wrapper<TensorTransform>> &transforms, int32_t num_ops = 2);

  ~UniformAugment() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;

 private:
  struct Data;
  std::shared_ptr<Data> data_;
};

class VerticalFlip final : public TensorTransform {
 public:
  VerticalFlip();

  ~VerticalFlip() = default;

 protected:
  std::shared_ptr<TensorOperation> Parse() override;
};

}  // namespace vision
}  // namespace dataset
}  // namespace mindspore
#endif  // MINDSPORE_CCSRC_MINDDATA_DATASET_INCLUDE_DATASET_VISION_H_