Ocean
FrameInterpolator.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_H
9 #define META_OCEAN_CV_FRAME_INTERPOLATOR_H
10 
11 #include "ocean/cv/CV.h"
14 #include "ocean/cv/FramePyramid.h"
15 
16 #include "ocean/base/Frame.h"
17 #include "ocean/base/Worker.h"
18 
19 namespace Ocean
20 {
21 
22 namespace CV
23 {
24 
25 /**
26  * This class implements functions interpolating frames and image content.
27  * In general, this class is just a thin wrapper around the actual implementation based on the desired interpolation method.<br>
28  * Please be aware that all non-template-based functions of this class are intended for prototyping only.<br>
29  * Binary size can increase significantly when using non-template-based functions as the wrapper will add binary size of every interpolation method.<br>
30  * Thus, in case binary size matters, use the template-based functions directly.<br>
31  * For more details and a visual comparisons of the available image resizing methods see https://facebookresearch.github.io/ocean/docs/images/resizing/
32  * @see FrameInterpolatorBilinear, FrameInterpolatorNearestPixel.
33  * @ingroup cv
34  */
35 class OCEAN_CV_EXPORT FrameInterpolator
36 {
37  public:
38 
39  /**
40  * Definition of individual interpolation methods.
41  */
43  {
44  /// An invalid interpolation method.
46  /// An interpolation applying a nearest pixel (nearest neighbor) lookup.
48  /// An interpolation applying a bilinear interpolation.
49  IM_BILINEAR
50  };
51 
52  /**
53  * Definition of individual resize methods.
54  * Commonly, higher enum values will create better image qualities, while also will need more computational time.
55  */
57  {
58  /// An invalid resize method.
60  /// An interpolation applying a nearest pixel (nearest neighbor) lookup.
62  /// An interpolation applying a bilinear interpolation.
64  /// A two-step interpolation, first applying a pyramid down sampling with a 11 filtering, followed by bilinear interpolation from pyramid layer to target image.
66  /// A two-step interpolation, first applying a pyramid down sampling with a 14641 filtering, followed by bilinear interpolation from pyramid layer to target image.
68  /// The resize method with best quality/performance ratio providing high image qualities with good performance values.
69  RM_AUTOMATIC = RM_NEAREST_PYRAMID_LAYER_11_BILINEAR
70  };
71 
72  public:
73 
74  /**
75  * Resizes/rescales a given frame by application of a specified interpolation method.
76  * @param frame The frame to resize, must not have a packed pixel format, must be valid
77  * @param width The width of the resized frame in pixel, with range [1, infinity)
78  * @param height The height of the resized frame in pixel, with range [1, infinity)
79  * @param resizeMethod The resize method to be used
80  * @param worker Optional worker object used for load distribution, must be valid
81  * @return True, if the frame could be resized
82  * @see FrameType::formatIsPacked().
83  */
84  static bool resize(Frame& frame, const unsigned int width, const unsigned int height, const ResizeMethod resizeMethod = RM_AUTOMATIC, Worker* worker = nullptr);
85 
86  /**
87  * Resizes/rescales a given frame by application of a specified interpolation method.
88  * @param source The source frame to resize, must not have a packed pixel format, must be valid
89  * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
90  * @param resizeMethod The resize method to be used
91  * @param worker Optional worker object used for load distribution, must be valid
92  * @return True, if the frame could be resized
93  * @see FrameType::formatIsPacked().
94  */
95  static bool resize(const Frame& source, Frame& target, const ResizeMethod resizeMethod = RM_AUTOMATIC, Worker* worker = nullptr);
96 
97  /**
98  * Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
99  * This template-based implementation ensures that the binary impact is as small as possible.
100  * @param source The source frame buffer to resize, must be valid
101  * @param target The target frame buffer, must be valid
102  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
103  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
104  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
105  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
106  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
107  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
108  * @param worker Optional worker object used for load distribution
109  * @return True, if the frame could be resized
110  * @tparam T Data type of each pixel channel, e.g., 'uint8_t', 'float'
111  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
112  * @tparam tResizeMethod The resize method to be used
113  */
114  template <typename T, unsigned int tChannels, ResizeMethod tResizeMethod = RM_AUTOMATIC>
115  static bool resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
116 
117  /**
118  * Applies an affine transformation to an image (with zipped pixel format).
119  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
120  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the affine transformation.<br>
121  * The multiplication of the affine transformation with pixel location in the output image yield their location in the input image, i.e., inputPoint = affineTransform * outputPoint.<br>
122  * The parameter 'outputOrigin' applies an additional translation to the provided affine transformation i.e., input_A_output * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
123  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
124  * <pre>
125  * Rxx Ryx Tx
126  * Rxy Ryy Ty
127  * 0 0 1
128  * </pre>
129  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
130  * Information: This function is the equivalent to OpenCV's cv::warpAffine().<br>
131  * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
132  * @param input The input frame that will be transformed, must be valid
133  * @param output The resulting frame after applying the affine transformation to the input frame; pixel format and pixel origin must be identical to input frame; memory of output frame must be allocated by the caller
134  * @param input_A_output The affine transform used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
135  * @param interpolationMethod The interpolation method to be used, must be either IM_BILINEAR or IM_NEAREST_PIXEL
136  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
137  * @param worker Optional worker object to distribute the computational load
138  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
139  * @return True, if succeeded
140  */
141  static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const InterpolationMethod interpolationMethod = IM_BILINEAR, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
142 
143  /**
144  * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
145  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
146  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
147  * The homography given defines the transformation of output pixels to input pixels (inputPoint = homography * outputPoint).<br>
148  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
149  * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
150  * @param input The input frame that will be transformed, must be valid
151  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
152  * @param input_H_output The homography used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
153  * @param interpolationMethod The interpolation method to be used, must be either IM_BILINEAR or IM_NEAREST_PIXEL
154  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
155  * @param worker Optional worker object to distribute the computational load
156  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
157  * @return True, if succeeded
158  */
159  static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const InterpolationMethod interpolationMethod = IM_BILINEAR, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
160 
161  protected:
162 
163  /**
164  * Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
165  * This template-based implementation ensures that the binary impact is as small as possible.
166  * @param source The source frame buffer to resize, must be valid
167  * @param target The target frame buffer, must be valid
168  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
169  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
170  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
171  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
172  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
173  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
174  * @param resizeMethod The resize method to be used
175  * @param worker Optional worker object used for load distribution
176  * @return True, if the frame could be resized
177  * @tparam T Data type of each pixel channel, e.g., 'uint8_t', 'float'
178  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
179  */
180  template <typename T, unsigned int tChannels>
181  static bool resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ResizeMethod resizeMethod, Worker* worker = nullptr);
182 };
183 
184 template <typename T, unsigned int tChannels, FrameInterpolator::ResizeMethod tResizeMethod>
185 bool FrameInterpolator::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
186 {
187  static_assert(tChannels >= 1u, "Invalid channel number!");
188  static_assert(tResizeMethod != RM_INVALID, "Invalid resize method!");
189  static_assert((std::is_same<T, uint8_t>::value || tResizeMethod != RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR), "Resize method is not supported for this data type!");
190 
191  ocean_assert(source != nullptr && target != nullptr);
192  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
193  ocean_assert(targetWidth != 0u && targetHeight != 0u);
194 
195  if (source == nullptr || target == nullptr || sourceWidth == 0u || sourceHeight == 0u || targetWidth == 0u || targetHeight == 0u)
196  {
197  return false;
198  }
199 
200  if (sourceWidth == targetWidth && sourceHeight == targetHeight)
201  {
202  const bool result = CV::FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, sourceWidth, sourceHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
203  ocean_assert(result);
204 
205  return result;
206  }
207 
208  if constexpr (tResizeMethod == RM_NEAREST_PIXEL)
209  {
210  FrameInterpolatorNearestPixel::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
211  return true;
212  }
213 
214  if constexpr (tResizeMethod == RM_BILINEAR)
215  {
216  FrameInterpolatorBilinear::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
217  return true;
218  }
219 
220  if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR || tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR)
221  {
222  if constexpr (std::is_same<T, uint8_t>::value == false)
223  {
224  ocean_assert(false && "Missing implementation!");
225  return false;
226  }
227 
228  const uint8_t* const source_u8 = (const uint8_t*)(source);
229  uint8_t* const target_u8 = (uint8_t*)(target);
230 
231  if (sourceWidth / 2u == targetWidth && sourceHeight / 2u == targetHeight)
232  {
233  if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR)
234  {
235  FrameShrinker::downsampleByTwo8BitPerChannel11(source_u8, target_u8, sourceWidth, sourceHeight, tChannels, sourcePaddingElements, targetPaddingElements, worker);
236  }
237  else
238  {
239  ocean_assert(tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR);
240  FrameShrinker::downsampleByTwo8BitPerChannel14641(source_u8, target_u8, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourcePaddingElements, targetPaddingElements, worker);
241  }
242 
243  return true;
244  }
245  else if (targetWidth < sourceWidth && targetHeight < sourceHeight)
246  {
247  ocean_assert(targetWidth > 0u && targetHeight >= 0u);
248  const unsigned int invalidCoarsestWidth = targetWidth - 1u;
249  const unsigned int invalidCoarsestHeight = targetHeight - 1u;
250 
251  unsigned int coarsestLayerWidth = 0u;
252  unsigned int coarsestLayerHeight = 0u;
253 
254  unsigned int layers = CV::FramePyramid::idealLayers(sourceWidth, sourceHeight, invalidCoarsestWidth, invalidCoarsestHeight, &coarsestLayerWidth, &coarsestLayerHeight);
255 
256  if (layers == 0u)
257  {
258  ocean_assert(false && "This should never happen!");
259  return false;
260  }
261 
262  if (coarsestLayerWidth == targetWidth && coarsestLayerHeight == targetHeight)
263  {
264  // the target frame matches with the resolution of the last pyramid layer, so that we can avoid copying the memory from the coarsest pyramid layer
265 
266  ocean_assert(layers >= 2u);
267  layers -= 1u;
268  }
269 
270  if (layers >= 2u)
271  {
272  constexpr FrameType::PixelOrigin anyPixelOrientation = FrameType::ORIGIN_UPPER_LEFT;
273 
274  FramePyramid framePyramid;
275 
276  if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR)
277  {
278  framePyramid = FramePyramid(source_u8, sourceWidth, sourceHeight, tChannels, anyPixelOrientation, layers, sourcePaddingElements, false /*copyFirstLayer*/, worker);
279  }
280  else
281  {
282  ocean_assert(tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR);
283  framePyramid = FramePyramid(source_u8, sourceWidth, sourceHeight, tChannels, anyPixelOrientation, FramePyramid::DM_FILTER_14641, layers, sourcePaddingElements, false /*copyFirstLayer*/, worker);
284  }
285 
286  if (!framePyramid.isValid())
287  {
288  ocean_assert(false && "This should never happen!");
289  return false;
290  }
291 
292  const Frame& coarsestPyramidLayer = framePyramid.coarsestLayer();
293 
294  FrameInterpolatorBilinear::resize<T, tChannels>(coarsestPyramidLayer.constdata<T>(), target, coarsestPyramidLayer.width(), coarsestPyramidLayer.height(), targetWidth, targetHeight, coarsestPyramidLayer.paddingElements(), targetPaddingElements, worker);
295  return true;
296  }
297  }
298 
299  FrameInterpolatorBilinear::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
300  return true;
301  }
302 
303  ocean_assert(false && "Invalid interpolation type!");
304  return false;
305 }
306 
307 }
308 
309 }
310 
311 #endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BICUBIC_H
This class implements functions interpolating frames and image content.
Definition: FrameInterpolator.h:36
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const InterpolationMethod interpolationMethod=IM_BILINEAR, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image (with zipped pixel format).
ResizeMethod
Definition of individual resize methods.
Definition: FrameInterpolator.h:57
@ RM_NEAREST_PIXEL
An interpolation applying a nearest pixel (nearest neighbor) lookup.
Definition: FrameInterpolator.h:61
@ RM_BILINEAR
An interpolation applying a bilinear interpolation.
Definition: FrameInterpolator.h:63
@ RM_NEAREST_PYRAMID_LAYER_11_BILINEAR
A two-step interpolation, first applying a pyramid down sampling with a 11 filtering,...
Definition: FrameInterpolator.h:65
@ RM_INVALID
An invalid resize method.
Definition: FrameInterpolator.h:59
@ RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR
A two-step interpolation, first applying a pyramid down sampling with a 14641 filtering,...
Definition: FrameInterpolator.h:67
static bool resize(Frame &frame, const unsigned int width, const unsigned int height, const ResizeMethod resizeMethod=RM_AUTOMATIC, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a specified interpolation method.
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const InterpolationMethod interpolationMethod=IM_BILINEAR, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ResizeMethod resizeMethod, Worker *worker=nullptr)
Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
InterpolationMethod
Definition of individual interpolation methods.
Definition: FrameInterpolator.h:43
@ IM_NEAREST_PIXEL
An interpolation applying a nearest pixel (nearest neighbor) lookup.
Definition: FrameInterpolator.h:47
@ IM_INVALID
An invalid interpolation method.
Definition: FrameInterpolator.h:45
static bool resize(const Frame &source, Frame &target, const ResizeMethod resizeMethod=RM_AUTOMATIC, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a specified interpolation method.
This class implements a frame pyramid.
Definition: FramePyramid.h:37
static unsigned int idealLayers(const unsigned int width, const unsigned int height, const unsigned int invalidCoarsestWidthOrHeight, unsigned int *coarsestLayerWidth=nullptr, unsigned int *coarsestLayerHeight=nullptr)
Determines the number of layers until an invalid frame size would be reached in the next layer.
bool isValid() const
Returns whether this pyramid holds at least one frame layer.
Definition: FramePyramid.h:863
@ DM_FILTER_14641
Down sampling is realized by a 5x5 Gaussian filter.
Definition: FramePyramid.h:72
const Frame & coarsestLayer() const
Returns the coarsest layer frame of this pyramid regarding to the number of valid layers.
Definition: FramePyramid.h:747
static void downsampleByTwo8BitPerChannel11(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reduces the resolution of a given frame by two, applying a 1-1 downsampling.
Definition: FrameShrinker.h:508
static void downsampleByTwo8BitPerChannel14641(const uint8_t *const source, uint8_t *const target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reduces the resolution of a given frame by two, applying a 1-4-6-4-1 downsampling.
Definition: FrameShrinker.h:561
This class implements a 2D pixel position with pixel precision.
Definition: PixelPosition.h:65
This class implements Ocean's image class.
Definition: Frame.h:1792
const T * constdata(const unsigned int planeIndex=0u) const
Returns a pointer to the read-only pixel data of a specific plane.
Definition: Frame.h:4168
unsigned int paddingElements(const unsigned int planeIndex=0u) const
Returns the optional number of padding elements at the end of each row for a specific plane.
Definition: Frame.h:4042
unsigned int width() const
Returns the width of the frame format in pixel.
Definition: Frame.h:3143
PixelOrigin
Defines different types of frame origin positions.
Definition: Frame.h:1046
@ ORIGIN_UPPER_LEFT
The first pixel lies in the upper left corner, the last pixel in the lower right corner.
Definition: Frame.h:1050
unsigned int height() const
Returns the height of the frame in pixel.
Definition: Frame.h:3148
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
PixelPositionT< int > PixelPositionI
Definition of a PixelPosition object with a data type allowing positive and negative coordinate value...
Definition: PixelPosition.h:41
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15