Ocean
Loading...
Searching...
No Matches
FrameInterpolator.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_H
10
11#include "ocean/cv/CV.h"
15
16#include "ocean/base/Frame.h"
17#include "ocean/base/Worker.h"
18
19namespace Ocean
20{
21
22namespace CV
23{
24
25/**
26 * This class implements functions interpolating frames and image content.
27 * In general, this class is just a thin wrapper around the actual implementation based on the desired interpolation method.<br>
28 * Please be aware that all non-template-based functions of this class are intended for prototyping only.<br>
29 * Binary size can increase significantly when using non-template-based functions as the wrapper will add binary size of every interpolation method.<br>
30 * Thus, in case binary size matters, use the template-based functions directly.<br>
31 * For more details and a visual comparisons of the available image resizing methods see https://facebookresearch.github.io/ocean/docs/images/resizing/
32 * @see FrameInterpolatorBilinear, FrameInterpolatorNearestPixel.
33 * @ingroup cv
34 */
35class OCEAN_CV_EXPORT FrameInterpolator
36{
37 public:
38
39 /**
40 * Definition of individual interpolation methods.
41 */
43 {
44 /// An invalid interpolation method.
46 /// An interpolation applying a nearest pixel (nearest neighbor) lookup.
48 /// An interpolation applying a bilinear interpolation.
49 IM_BILINEAR
50 };
51
52 /**
53 * Definition of individual resize methods.
54 * Commonly, higher enum values will create better image qualities, while also will need more computational time.
55 */
57 {
58 /// An invalid resize method.
60 /// An interpolation applying a nearest pixel (nearest neighbor) lookup.
62 /// An interpolation applying a bilinear interpolation.
64 /// A two-step interpolation, first applying a pyramid down sampling with a 11 filtering, followed by bilinear interpolation from pyramid layer to target image.
66 /// A two-step interpolation, first applying a pyramid down sampling with a 14641 filtering, followed by bilinear interpolation from pyramid layer to target image.
68 /// The resize method with best quality/performance ratio providing high image qualities with good performance values.
69 RM_AUTOMATIC = RM_NEAREST_PYRAMID_LAYER_11_BILINEAR
70 };
71
72 public:
73
74 /**
75 * Resizes/rescales a given frame by application of a specified interpolation method.
76 * @param frame The frame to resize, must not have a packed pixel format, must be valid
77 * @param width The width of the resized frame in pixel, with range [1, infinity)
78 * @param height The height of the resized frame in pixel, with range [1, infinity)
79 * @param resizeMethod The resize method to be used
80 * @param worker Optional worker object used for load distribution, must be valid
81 * @return True, if the frame could be resized
82 * @see FrameType::formatIsPacked().
83 */
84 static bool resize(Frame& frame, const unsigned int width, const unsigned int height, const ResizeMethod resizeMethod = RM_AUTOMATIC, Worker* worker = nullptr);
85
86 /**
87 * Resizes/rescales a given frame by application of a specified interpolation method.
88 * @param source The source frame to resize, must not have a packed pixel format, must be valid
89 * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
90 * @param resizeMethod The resize method to be used
91 * @param worker Optional worker object used for load distribution, must be valid
92 * @return True, if the frame could be resized
93 * @see FrameType::formatIsPacked().
94 */
95 static bool resize(const Frame& source, Frame& target, const ResizeMethod resizeMethod = RM_AUTOMATIC, Worker* worker = nullptr);
96
97 /**
98 * Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
99 * This template-based implementation ensures that the binary impact is as small as possible.
100 * @param source The source frame buffer to resize, must be valid
101 * @param target The target frame buffer, must be valid
102 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
103 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
104 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
105 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
106 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
107 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
108 * @param worker Optional worker object used for load distribution
109 * @return True, if the frame could be resized
110 * @tparam T Data type of each pixel channel, e.g., 'uint8_t', 'float'
111 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
112 * @tparam tResizeMethod The resize method to be used
113 */
114 template <typename T, unsigned int tChannels, ResizeMethod tResizeMethod = RM_AUTOMATIC>
115 static bool resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
116
117 /**
118 * Applies an affine transformation to an image (with zipped pixel format).
119 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
120 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the affine transformation.<br>
121 * The multiplication of the affine transformation with pixel location in the output image yield their location in the input image, i.e., inputPoint = affineTransform * outputPoint.<br>
122 * The parameter 'outputOrigin' applies an additional translation to the provided affine transformation i.e., input_A_output * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
123 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
124 * <pre>
125 * Rxx Ryx Tx
126 * Rxy Ryy Ty
127 * 0 0 1
128 * </pre>
129 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
130 * Information: This function is the equivalent to OpenCV's cv::warpAffine().<br>
131 * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
132 * @param input The input frame that will be transformed, must be valid
133 * @param output The resulting frame after applying the affine transformation to the input frame; pixel format and pixel origin must be identical to input frame; memory of output frame must be allocated by the caller
134 * @param input_A_output The affine transform used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
135 * @param interpolationMethod The interpolation method to be used, must be either IM_BILINEAR or IM_NEAREST_PIXEL
136 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
137 * @param worker Optional worker object to distribute the computational load
138 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
139 * @return True, if succeeded
140 */
141 static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const InterpolationMethod interpolationMethod = IM_BILINEAR, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
142
143 /**
144 * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
145 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
146 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
147 * The homography given defines the transformation of output pixels to input pixels (inputPoint = homography * outputPoint).<br>
148 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
149 * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
150 * @param input The input frame that will be transformed, must be valid
151 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
152 * @param input_H_output The homography used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
153 * @param interpolationMethod The interpolation method to be used, must be either IM_BILINEAR or IM_NEAREST_PIXEL
154 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
155 * @param worker Optional worker object to distribute the computational load
156 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
157 * @return True, if succeeded
158 */
159 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const InterpolationMethod interpolationMethod = IM_BILINEAR, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
160
161 protected:
162
163 /**
164 * Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
165 * This template-based implementation ensures that the binary impact is as small as possible.
166 * @param source The source frame buffer to resize, must be valid
167 * @param target The target frame buffer, must be valid
168 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
169 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
170 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
171 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
172 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
173 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
174 * @param resizeMethod The resize method to be used
175 * @param worker Optional worker object used for load distribution
176 * @return True, if the frame could be resized
177 * @tparam T Data type of each pixel channel, e.g., 'uint8_t', 'float'
178 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
179 */
180 template <typename T, unsigned int tChannels>
181 static bool resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ResizeMethod resizeMethod, Worker* worker = nullptr);
182};
183
184template <typename T, unsigned int tChannels, FrameInterpolator::ResizeMethod tResizeMethod>
185bool FrameInterpolator::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
186{
187 static_assert(tChannels >= 1u, "Invalid channel number!");
188 static_assert(tResizeMethod != RM_INVALID, "Invalid resize method!");
189 static_assert((std::is_same<T, uint8_t>::value || tResizeMethod != RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR), "Resize method is not supported for this data type!");
190
191 ocean_assert(source != nullptr && target != nullptr);
192 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
193 ocean_assert(targetWidth != 0u && targetHeight != 0u);
194
195 if (source == nullptr || target == nullptr || sourceWidth == 0u || sourceHeight == 0u || targetWidth == 0u || targetHeight == 0u)
196 {
197 return false;
198 }
199
200 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
201 {
202 const bool result = CV::FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, sourceWidth, sourceHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
203 ocean_assert(result);
204
205 return result;
206 }
207
208 if constexpr (tResizeMethod == RM_NEAREST_PIXEL)
209 {
210 FrameInterpolatorNearestPixel::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
211 return true;
212 }
213
214 if constexpr (tResizeMethod == RM_BILINEAR)
215 {
216 FrameInterpolatorBilinear::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
217 return true;
218 }
219
220 if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR || tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR)
221 {
222 if constexpr (std::is_same<T, uint8_t>::value == false)
223 {
224 ocean_assert(false && "Missing implementation!");
225 return false;
226 }
227
228 const uint8_t* const source_u8 = (const uint8_t*)(source);
229 uint8_t* const target_u8 = (uint8_t*)(target);
230
231 if (sourceWidth / 2u == targetWidth && sourceHeight / 2u == targetHeight)
232 {
233 if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR)
234 {
235 FrameShrinker::downsampleByTwo8BitPerChannel11(source_u8, target_u8, sourceWidth, sourceHeight, tChannels, sourcePaddingElements, targetPaddingElements, worker);
236 }
237 else
238 {
239 ocean_assert(tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR);
240 FrameShrinker::downsampleByTwo8BitPerChannel14641(source_u8, target_u8, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourcePaddingElements, targetPaddingElements, worker);
241 }
242
243 return true;
244 }
245 else if (targetWidth < sourceWidth && targetHeight < sourceHeight)
246 {
247 ocean_assert(targetWidth > 0u && targetHeight >= 0u);
248 const unsigned int invalidCoarsestWidth = targetWidth - 1u;
249 const unsigned int invalidCoarsestHeight = targetHeight - 1u;
250
251 unsigned int coarsestLayerWidth = 0u;
252 unsigned int coarsestLayerHeight = 0u;
253
254 unsigned int layers = CV::FramePyramid::idealLayers(sourceWidth, sourceHeight, invalidCoarsestWidth, invalidCoarsestHeight, &coarsestLayerWidth, &coarsestLayerHeight);
255
256 if (layers == 0u)
257 {
258 ocean_assert(false && "This should never happen!");
259 return false;
260 }
261
262 if (coarsestLayerWidth == targetWidth && coarsestLayerHeight == targetHeight)
263 {
264 // the target frame matches with the resolution of the last pyramid layer, so that we can avoid copying the memory from the coarsest pyramid layer
265
266 ocean_assert(layers >= 2u);
267 layers -= 1u;
268 }
269
270 if (layers >= 2u)
271 {
272 constexpr FrameType::PixelOrigin anyPixelOrientation = FrameType::ORIGIN_UPPER_LEFT;
273
274 FramePyramid framePyramid;
275
276 if constexpr (tResizeMethod == RM_NEAREST_PYRAMID_LAYER_11_BILINEAR)
277 {
278 framePyramid = FramePyramid(source_u8, sourceWidth, sourceHeight, tChannels, anyPixelOrientation, layers, sourcePaddingElements, false /*copyFirstLayer*/, worker);
279 }
280 else
281 {
282 ocean_assert(tResizeMethod == RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR);
283 framePyramid = FramePyramid(source_u8, sourceWidth, sourceHeight, tChannels, anyPixelOrientation, FramePyramid::DM_FILTER_14641, layers, sourcePaddingElements, false /*copyFirstLayer*/, worker);
284 }
285
286 if (!framePyramid.isValid())
287 {
288 ocean_assert(false && "This should never happen!");
289 return false;
290 }
291
292 const Frame& coarsestPyramidLayer = framePyramid.coarsestLayer();
293
294 FrameInterpolatorBilinear::resize<T, tChannels>(coarsestPyramidLayer.constdata<T>(), target, coarsestPyramidLayer.width(), coarsestPyramidLayer.height(), targetWidth, targetHeight, coarsestPyramidLayer.paddingElements(), targetPaddingElements, worker);
295 return true;
296 }
297 }
298
299 FrameInterpolatorBilinear::resize<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, worker);
300 return true;
301 }
302
303 ocean_assert(false && "Invalid interpolation type!");
304 return false;
305}
306
307}
308
309}
310
311#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BICUBIC_H
This class implements functions interpolating frames and image content.
Definition FrameInterpolator.h:36
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const InterpolationMethod interpolationMethod=IM_BILINEAR, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image (with zipped pixel format).
ResizeMethod
Definition of individual resize methods.
Definition FrameInterpolator.h:57
@ RM_NEAREST_PIXEL
An interpolation applying a nearest pixel (nearest neighbor) lookup.
Definition FrameInterpolator.h:61
@ RM_BILINEAR
An interpolation applying a bilinear interpolation.
Definition FrameInterpolator.h:63
@ RM_NEAREST_PYRAMID_LAYER_11_BILINEAR
A two-step interpolation, first applying a pyramid down sampling with a 11 filtering,...
Definition FrameInterpolator.h:65
@ RM_INVALID
An invalid resize method.
Definition FrameInterpolator.h:59
@ RM_NEAREST_PYRAMID_LAYER_14641_BILINEAR
A two-step interpolation, first applying a pyramid down sampling with a 14641 filtering,...
Definition FrameInterpolator.h:67
static bool resize(Frame &frame, const unsigned int width, const unsigned int height, const ResizeMethod resizeMethod=RM_AUTOMATIC, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a specified interpolation method.
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const InterpolationMethod interpolationMethod=IM_BILINEAR, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ResizeMethod resizeMethod, Worker *worker=nullptr)
Resizes/rescales a given 1-plane frame by application of a specified interpolation method.
InterpolationMethod
Definition of individual interpolation methods.
Definition FrameInterpolator.h:43
@ IM_NEAREST_PIXEL
An interpolation applying a nearest pixel (nearest neighbor) lookup.
Definition FrameInterpolator.h:47
@ IM_INVALID
An invalid interpolation method.
Definition FrameInterpolator.h:45
static bool resize(const Frame &source, Frame &target, const ResizeMethod resizeMethod=RM_AUTOMATIC, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a specified interpolation method.
This class implements a frame pyramid.
Definition FramePyramid.h:37
static unsigned int idealLayers(const unsigned int width, const unsigned int height, const unsigned int invalidCoarsestWidthOrHeight, unsigned int *coarsestLayerWidth=nullptr, unsigned int *coarsestLayerHeight=nullptr)
Determines the number of layers until an invalid frame size would be reached in the next layer.
bool isValid() const
Returns whether this pyramid holds at least one frame layer.
Definition FramePyramid.h:863
@ DM_FILTER_14641
Down sampling is realized by a 5x5 Gaussian filter.
Definition FramePyramid.h:72
const Frame & coarsestLayer() const
Returns the coarsest layer frame of this pyramid regarding to the number of valid layers.
Definition FramePyramid.h:747
static void downsampleByTwo8BitPerChannel11(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reduces the resolution of a given frame by two, applying a 1-1 downsampling.
Definition FrameShrinker.h:508
static void downsampleByTwo8BitPerChannel14641(const uint8_t *const source, uint8_t *const target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reduces the resolution of a given frame by two, applying a 1-4-6-4-1 downsampling.
Definition FrameShrinker.h:561
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:65
This class implements Ocean's image class.
Definition Frame.h:1808
const T * constdata(const unsigned int planeIndex=0u) const
Returns a pointer to the read-only pixel data of a specific plane.
Definition Frame.h:4248
unsigned int paddingElements(const unsigned int planeIndex=0u) const
Returns the optional number of padding elements at the end of each row for a specific plane.
Definition Frame.h:4122
unsigned int width() const
Returns the width of the frame format in pixel.
Definition Frame.h:3170
PixelOrigin
Defines different types of frame origin positions.
Definition Frame.h:1046
@ ORIGIN_UPPER_LEFT
The first pixel lies in the upper left corner, the last pixel in the lower right corner.
Definition Frame.h:1050
unsigned int height() const
Returns the height of the frame in pixel.
Definition Frame.h:3175
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
The namespace covering the entire Ocean framework.
Definition Accessor.h:15