Ocean
FrameInterpolatorNearestPixel.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
9 #define META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
10 
11 #include "ocean/cv/CV.h"
13 #include "ocean/cv/PixelPosition.h"
14 
15 #include "ocean/base/DataType.h"
16 #include "ocean/base/Frame.h"
17 #include "ocean/base/Memory.h"
18 #include "ocean/base/Worker.h"
19 
20 #include "ocean/math/Lookup2.h"
22 
23 namespace Ocean
24 {
25 
26 namespace CV
27 {
28 
29 /**
30  * This class implements a nearest pixel frame interpolator.
31  * Actually, no pixels are interpolated, but the color intensities from the nearest pixels (e.g., based on rounding) is used.<br>
32  * @ingroup cv
33  */
34 class OCEAN_CV_EXPORT FrameInterpolatorNearestPixel
35 {
36  public:
37 
38  /// Definition of a lookup table for 2D vectors.
40 
41  public:
42 
43  /**
44  * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
45  * Best practice is to avoid using these functions if binary size matters,<br>
46  * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
47  */
48  class OCEAN_CV_EXPORT Comfort
49  {
50  public:
51 
52  /**
53  * Resizes a given frame by a nearest pixel search.
54  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
55  * @param source The source frame that will be resized, must have a zipped pixel format, must be valid
56  * @param target The target frame that receives the image information of the source frame, the pixel format and pixel origin must match with the source frame
57  * @param worker Optional worker object to distribute the computational load
58  * @return True, if succeeded
59  */
60  static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
61 
62  /**
63  * Resizes a given frame in place by a nearest pixel search.
64  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
65  * @param frame The frame that will be resized, must have a zipped pixel format, must be valid
66  * @param targetWidth Width of the new target frame in pixel, with range [1, infinity)
67  * @param targetHeight Height of the new target frame in pixel, with range [1, infinity)
68  * @param worker Optional worker object to distribute the computational load
69  * @return True, if succeeded
70  */
71  static inline bool resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker = nullptr);
72 
73  /**
74  * Applies an affine image transformation to a frame (with zipped pixel format) and renders using nearest-neighbor interpolation
75  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
76  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation
77  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
78  * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
79  * @param output The output frame resulting by application of the given affine transformation, with same pixel format and pixel origin as the input frame, must have a valid dimension
80  * @param input_A_output Affine transformation used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
81  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
82  * @param worker Optional worker object to distribute the computational load
83  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
84  * @return True, if succeeded
85  */
86  static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
87 
88  /**
89  * Transforms a given input frame (with zipped pixel format) into an output frame by application of a homography.
90  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
91  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
92  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
93  * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
94  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
95  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
96  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
97  * @param worker Optional worker object to distribute the computational load
98  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
99  * @return True, if succeeded
100  */
101  static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
102 
103  /**
104  * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
105  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
106  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
107  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
108  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
109  * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
110  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
111  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
112  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
113  * @param worker Optional worker object to distribute the computational load
114  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
115  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
116  * @return True, if succeeded
117  * @see Geometry::Homography::coversHomographyInputFrame().
118  */
119  static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
120 
121  /**
122  * Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation lookup table.
123  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
124  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
125  * @param input The input frame which will be transformed, must have a zipped pixel format, must be valid
126  * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
127  * @param lookupTable The lookup table which defines the transformation, must be valid
128  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
129  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
130  * @param worker Optional worker object to distribute the computation
131  * @return True, if succeeded
132  */
133  static bool transform(const Frame& input, Frame& output, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, Worker* worker = nullptr);
134 
135  /**
136  * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
137  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
138  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
139  * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
140  * @param input The input frame which will be transformed
141  * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
142  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
143  * @param lookupTable The lookup table which defines the transformation, must be valid
144  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
145  * @param worker Optional worker object to distribute the computation
146  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
147  * @return True, if succeeded
148  */
149  static bool transformMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& lookupTable, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
150 
151  /**
152  * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
153  * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate90().
154  * @param input The input frame which will be rotated, must be valid
155  * @param output The resulting rotated output frame, the frame type will be set automatically
156  * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
157  * @param worker Optional worker object to distribute the computation
158  * @return True, if succeeded
159  */
160  static inline bool rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker = nullptr);
161 
162  /**
163  * Rotates a given frame by 180 degrees.
164  * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate180().
165  * @param input The input frame which will be rotated, must be valid
166  * @param output The resulting rotated output frame, the frame type will be set automatically
167  * @param worker Optional worker object to distribute the computation
168  * @return True, if succeeded
169  */
170  static inline bool rotate180(const Frame& input, Frame& output, Worker* worker = nullptr);
171 
172  /**
173  * Rotates a given frame with 90 degree steps.
174  * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate().
175  * @param input The input frame which will be rotated, must be valid
176  * @param output The resulting rotated output frame, the frame type will be set automatically
177  * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
178  * @param worker Optional worker object to distribute the computation
179  * @return True, if succeeded
180  */
181  static bool rotate(const Frame& input, Frame& output, const int angle, Worker* worker = nullptr);
182  };
183 
184  /**
185  * This class implements highly optimized interpolation functions with fixed properties.
186  * The functions can be significantly faster as these functions are tailored to the specific properties.
187  */
188  class OCEAN_CV_EXPORT SpecialCases
189  {
190  public:
191 
192  /**
193  * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
194  * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
195  * @param source The source frame buffer with resolution 400x400, must be valid
196  * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
197  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
198  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
199  * @see FrameInterpolatorBilinear::resize<T, tChannels>().
200  */
201  static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
202  };
203 
204  /**
205  * Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the process.
206  * @param source The source frame buffer, must be valid
207  * @param target The target frame buffer, must be valid
208  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
209  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
210  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
211  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
212  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
213  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
214  * @param worker Optional worker to distribute the computation
215  * @tparam T Data type the pixel channel values
216  * @tparam tChannels Number of data channels, with range [1, infinity)
217  */
218  template <typename T, unsigned int tChannels>
219  static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
220 
221  /**
222  * Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
223  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
224  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation.
225  * @param input The input frame that will be transformed, must be valid
226  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
227  * @param inputHeight Height of both input images pixel, with range [1, infinity)
228  * @param input_A_output The affine transformation used to transform the given input frame, transforming output points to input points, must be valid
229  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
230  * @param output The output frame using the given affine transform, must be valid
231  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
232  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
233  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
234  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
235  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
236  * @param worker Optional worker object to distribute the computational load
237  * @tparam tChannels The number of channels of the frame, with range [1, infinity)
238  */
239  template <unsigned int tChannels>
240  static inline void affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_A_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
241 
242  /**
243  * Transforms a given input frame into an output frame by application of a homography.
244  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
245  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
246  * @param input The input frame that will be transformed, must be valid
247  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
248  * @param inputHeight Height of both input images pixel, with range [1, infinity)
249  * @param input_H_output The homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
250  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
251  * @param output The output frame using the given homography, must be valid
252  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
253  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
254  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
255  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
256  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
257  * @param worker Optional worker object to distribute the computational load
258  * @tparam T Data type of each pixel channel, e.g., float, double, int
259  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
260  * @see homographyMask8BitPerChannel().
261  */
262  template <typename T, unsigned int tChannels>
263  static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
264 
265  /**
266  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
267  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
268  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
269  * @param input The input frame that will be transformed, must be valid
270  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
271  * @param inputHeight Height of both input images pixel, with range [1, infinity)
272  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
273  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
274  * @param output The output frame using the given homography, must be valid
275  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
276  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
277  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
278  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
279  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
280  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
281  * @param worker Optional worker object to distribute the computational load
282  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
283  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
284  * @see homographyMask8BitPerChannel().
285  */
286  template <unsigned int tChannels>
287  static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
288 
289  /**
290  * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
291  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
292  * @param input The input frame which will be transformed, must be valid
293  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
294  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
295  * @param lookupTable The lookup table which defines the transformation, must be valid
296  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
297  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
298  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
299  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
300  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
301  * @param worker Optional worker object to distribute the computation
302  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
303  */
304  template <unsigned int tChannels>
305  static inline void transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
306 
307  /**
308  * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
309  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
310  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
311  * @param input The input frame which will be transformed, must be valid
312  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
313  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
314  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
315  * @param lookupTable The lookup table which defines the transformation, must be valid
316  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
317  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
318  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
319  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
320  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
321  * @param worker Optional worker object to distribute the computation
322  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
323  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
324  */
325  template <unsigned int tChannels>
326  static inline void transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
327 
328  /**
329  * Rotates a given frame either clockwise or counter-clockwise by 90 degree.
330  * @param source The source frame which will be rotated, must be valid
331  * @param target The resulting rotated target frame, must be valid and must have the same buffer size as the source frame
332  * @param sourceWidth The width of the source frame in pixel, with range [1, infinity)
333  * @param sourceHeight The height of the source frame in pixel, with range [1, infinity)
334  * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
335  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
336  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
337  * @param worker Optional worker object to distribute the computation
338  * @tparam TElementType Data type of the elements of the image pixels
339  * @tparam tChannels Number of data channels, with range [1, infinity)
340  */
341  template <typename TElementType, unsigned int tChannels>
342  static inline void rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
343 
344  /**
345  * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the nearest pixel 'interpolation') or whether the homography relies on missing image information.
346  * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
347  * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
348  * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
349  * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
350  * @param input_H_output The homography to check which transforms points by following equation: inputPoint = input_H_output * outputPoint, must be valid
351  * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
352  * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
353  * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
354  */
355  static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
356 
357  private:
358 
359  /**
360  * Resizes a given frame by a nearest pixel search.
361  * @param source The source frame buffer, must be valid
362  * @param target The target frame buffer, must be valid
363  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
364  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
365  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
366  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
367  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
368  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
369  * @param firstTargetRow First (including) row to convert, with range [0, targetHeight)
370  * @param numberTargetRows Number of rows to convert, with range [1, targetHeight - firstTargetRow]
371  * @tparam T Data type the pixel channel values
372  * @tparam tChannels Number of data channels, range: [1, infinity)
373  */
374  template <typename T, unsigned int tChannels>
375  static void resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
376 
377  /**
378  * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation
379  * @param input The input frame that will be transformed, must be valid
380  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
381  * @param inputHeight Height of both input images pixel, with range [1, infinity)
382  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
383  * @param affineTransform Affine transformation used to transform the given input frame, must be valid
384  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
385  * @param output The output frame using the given affine transform, must be valid
386  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
387  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
388  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
389  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
390  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
391  * @tparam tChannels Number of frame channels, range: [1, infinity)
392  */
393  template <unsigned int tChannels>
394  static void affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
395 
396  /**
397  * Copies the image content of an input image to a subset of an output image by application of a given homography transformation.
398  * @param input The input frame that will be transformed, must be valid
399  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
400  * @param inputHeight Height of both input images pixel, with range [1, infinity)
401  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
402  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
403  * @param output The output frame using the given homography, must be valid
404  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
405  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
406  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
407  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
408  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
409  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
410  * @tparam T Data type of each pixel channel, e.g., float, double, int
411  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
412  */
413  template <typename T, unsigned int tChannels>
414  static void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
415 
416  #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
417 
418  /**
419  * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation (using SSE)
420  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
421  * @param input The input frame that will be transformed
422  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
423  * @param inputHeight Height of both input images pixel, with range [1, infinity)
424  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
425  * @param affineTransform Affine transformation which is applied to input frame.
426  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
427  * @param output The output frame where the result of the transformation will be stored
428  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
429  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
430  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
431  * @param firstOutputRow The first output row to be handled
432  * @param numberOutputRows Number of output rows to be handled
433  * @tparam tChannels Number of frame channels
434  * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
435  */
436  template <unsigned int tChannels>
437  static inline void affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
438 
439  /**
440  * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using SSE).
441  * @param input The input frame that will be transformed, must be valid
442  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
443  * @param inputHeight Height of both input images pixel, with range [1, infinity)
444  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
445  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
446  * @param output The output frame using the given homography, must be valid
447  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
448  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
449  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
450  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
451  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
452  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
453  * @tparam T Data type of each pixel channel, e.g., float, double, int
454  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
455  */
456  template <typename T, unsigned int tChannels>
457  static void homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
458 
459  #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
460 
461  #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
462 
463  /**
464  * Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using NEON and integer fixed-point arithmetic)
465  * @param input The input frame that will be transformed, must be valid
466  * @param inputWidth Width of both input images in pixel, with range [1, 65536)
467  * @param inputHeight Height of both input images pixel, with range [1, 65536)
468  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
469  * @param affineTransform Affine transformation used to transform the given input frame, must be valid
470  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
471  * @param output The output frame using the given affine transform, must be valid
472  * @param outputWidth The width of the output image in pixel, with range [1, 65536)
473  * @param outputHeight The height of the output image in pixel, with range [1, 65536)
474  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
475  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
476  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
477  * @tparam tChannels Number of frame channels
478  */
479  template <unsigned int tChannels>
480  static inline void affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
481 
482  /**
483  * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using NEON).
484  * Beware: The output width 'outputWidth' must be >= 4, use homographySubset for small output frames
485  * @param input The input frame that will be transformed
486  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
487  * @param inputHeight Height of both input images pixel, with range [1, infinity)
488  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
489  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, @c nullptr to assign 0 to each channel
490  * @param output The output frame using the given homography
491  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
492  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
493  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
494  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
495  * @param firstOutputRow The first output row to be handled
496  * @param numberOutputRows Number of output rows to be handled
497  * @tparam T Data type of each pixel channel, e.g., float, double, int
498  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
499  * @see homographySubset().
500  */
501  template <typename T, unsigned int tChannels>
502  static inline void homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
503 
504  #endif // OCEAN_HARDWARE_NEON_VERSION
505 
506  /**
507  * Transforms an 8 bit per channel frame using the given homography.
508  * @param input The input frame that will be transformed, must be valid
509  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
510  * @param inputHeight Height of both input images pixel, with range [1, infinity)
511  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
512  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
513  * @param output The output frame using the given homography, must be valid
514  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
515  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
516  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
517  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
518  * @param outputOriginX The horizontal coordinate of the output frame's origin
519  * @param outputOriginY The vertical coordinate of the output frame's origin
520  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
521  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
522  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
523  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
524  * @tparam tChannels Number of frame channels
525  */
526  template <unsigned int tChannels>
527  static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
528 
529  /**
530  * Transforms a subset of a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
531  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
532  * @param input the input frame which will be transformed, must be valid
533  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
534  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
535  * @param lookupTable The lookup table which defines the transformation, must be valid
536  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
537  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
538  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
539  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
540  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
541  * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
542  * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
543  * @tparam tChannels Number of channels of the frame
544  */
545  template <unsigned int tChannels>
546  static void transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
547 
548  /**
549  * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
550  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
551  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
552  * @param input The input frame which will be transformed, must be valid
553  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
554  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
555  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
556  * @param lookupTable The lookup table which defines the transformation, must be valid
557  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
558  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
559  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
560  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
561  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
562  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
563  * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
564  * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
565  * @tparam tChannels Number of channels of the frame
566  */
567  template <unsigned int tChannels>
568  static void transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
569 };
570 
571 inline bool FrameInterpolatorNearestPixel::Comfort::resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker)
572 {
573  ocean_assert(frame && targetWidth >= 1u && targetHeight >= 1u);
574 
575  Frame tmpFrame(FrameType(frame, targetWidth, targetHeight));
576 
577  if (!resize(frame, tmpFrame, worker))
578  {
579  return false;
580  }
581 
582  tmpFrame.setTimestamp(frame.timestamp());
583  tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
584 
585  frame = std::move(tmpFrame);
586  return true;
587 }
588 
589 inline bool FrameInterpolatorNearestPixel::Comfort::rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker)
590 {
591  return FrameTransposer::Comfort::rotate90(input, output, clockwise, worker);
592 }
593 
594 inline bool FrameInterpolatorNearestPixel::Comfort::rotate180(const Frame& input, Frame& output, Worker* worker)
595 {
596  return FrameTransposer::Comfort::rotate180(input, output, worker);
597 }
598 
599 inline bool FrameInterpolatorNearestPixel::Comfort::rotate(const Frame& input, Frame& output, const int angle, Worker* worker)
600 {
601  return FrameTransposer::Comfort::rotate(input, output, angle, worker);
602 }
603 
604 template <typename T, unsigned int tChannels>
605 inline void FrameInterpolatorNearestPixel::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
606 {
607  ocean_assert(source && target);
608 
609  if (worker)
610  {
611  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::resizeSubset<T, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
612  }
613  else
614  {
615  resizeSubset<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
616  }
617 }
618 
619 template <unsigned int tChannels>
620 inline void FrameInterpolatorNearestPixel::affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& affineTransform, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
621 {
622  // Merge the additional translation into the affine transformation
623  const SquareMatrix3 adjustedAffineTransform = affineTransform * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
624 
625  if (worker)
626  {
627  if (outputWidth >= 4u)
628  {
629 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
630  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
631  return;
632 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
633  if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
634  {
635  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 32u);
636  return;
637  }
638 #endif
639  }
640 
641  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
642  }
643  else
644  {
645  if (outputWidth >= 4u)
646  {
647 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
648  affine8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
649  return;
650 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
651  if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
652  {
653  affine8BitPerChannelIntegerNEONSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
654  return;
655  }
656 #endif
657  }
658 
659  affine8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
660  }
661 }
662 
663 template <typename T, unsigned int tChannels>
664 inline void FrameInterpolatorNearestPixel::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
665 {
666  static_assert(tChannels >= 1u, "Invalid channel number!");
667 
668  // Merge the additional translation into the homography
669  const SquareMatrix3 input_H_adjustedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
670 
671  typedef typename TypeMapper<T>::Type MappedTypeT;
672 
673  if (worker)
674  {
675  if (outputWidth >= 4u)
676  {
677 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
678  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySSESubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
679  return;
680 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
681  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyNEONSubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
682  return;
683 #endif
684  }
685 
686  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
687  }
688  else
689  {
690  if (outputWidth >= 4u)
691  {
692 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
693  homographySSESubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
694  return;
695 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
696  homographyNEONSubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
697  return;
698 #endif
699  }
700 
701  homographySubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
702  }
703 }
704 
705 template <unsigned int tChannels>
706 inline void FrameInterpolatorNearestPixel::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker, const uint8_t maskValue)
707 {
708  if (worker)
709  {
710  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, 0u), 0u, outputHeight, 14u, 15u, 20u);
711  }
712  else
713  {
714  homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, outputHeight);
715  }
716 }
717 
718 template <unsigned int tChannels>
719 inline void FrameInterpolatorNearestPixel::transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
720 {
721  if (worker)
722  {
723  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transform8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 9u, 10u, 20u);
724  }
725  else
726  {
727  transform8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(lookupTable.sizeY()));
728  }
729 }
730 
731 template <unsigned int tChannels>
732 inline void FrameInterpolatorNearestPixel::transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
733 {
734  if (worker)
735  {
736  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 11u, 12u, 20u);
737  }
738  else
739  {
740  transformMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, (unsigned int)lookupTable.sizeY());
741  }
742 }
743 
744 template <typename TElementType, unsigned int tChannels>
745 inline void FrameInterpolatorNearestPixel::rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
746 {
747  static_assert(tChannels >= 1u, "Invalid channel number!");
748 
749  ocean_assert(source != nullptr && target != nullptr);
750  ocean_assert(source != target);
751  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
752 
753  FrameTransposer::rotate90<TElementType, tChannels>(source, target, sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, worker);
754 }
755 
756 template <typename T, unsigned int tChannels>
757 void FrameInterpolatorNearestPixel::resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
758 {
759  static_assert(tChannels > 0u, "Invalid channel number!");
760  static_assert(sizeof(T) != 0, "Invalid data type!");
761 
762  ocean_assert(source != nullptr && target != nullptr);
763  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
764  ocean_assert(targetWidth != 0u && targetHeight != 0u);
765 
766  ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
767 
768  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
769  const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
770 
771  Memory memoryHorizontalLookups = Memory::create<unsigned int>(targetWidth);
772  unsigned int* horizontalLookups = memoryHorizontalLookups.data<unsigned int>();
773 
774  for (unsigned int tx = 0u; tx < targetWidth; ++tx)
775  {
776  const unsigned int sx = tx * sourceWidth / targetWidth;
777  ocean_assert(sx < sourceWidth);
778 
779  horizontalLookups[tx] = sx * tChannels;
780  }
781 
782  target += firstTargetRow * targetStrideElements;
783 
784  for (unsigned int ty = firstTargetRow; ty < firstTargetRow + numberTargetRows; ++ty)
785  {
786  const unsigned int sy = ty * sourceHeight / targetHeight;
787  ocean_assert(sy < sourceHeight);
788 
789  const T* const sourceRow = source + sy * sourceStrideElements;
790 
791  for (unsigned int tx = 0; tx < targetWidth; ++tx)
792  {
793  const T* const sourcePointer = sourceRow + horizontalLookups[tx];
794 
795  for (unsigned int n = 0u; n < tChannels; ++n)
796  {
797  *target++ = sourcePointer[n];
798  }
799  }
800 
801  target += targetPaddingElements;
802  }
803 }
804 
805 template <unsigned int tChannels>
806 void FrameInterpolatorNearestPixel::affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
807 {
808  static_assert(tChannels >= 1u, "Invalid channel number!");
809 
810  ocean_assert(input != nullptr && output != nullptr);
811  ocean_assert(inputWidth > 0u && inputHeight > 0u);
812  ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
813  ocean_assert(affineTransform);
814  ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
815 
816  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
817 
818  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
819 
820  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
821  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
822 
823  PixelType* outputData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
824 
825  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
826  {
827  //
828  // We can slightly optimize the 3x3 matrix multiplication:
829  //
830  // | X0 Y0 Z0 | | x |
831  // | X1 Y1 Z1 | * | y |
832  // | 0 0 1 | | 1 |
833  //
834  // | xx | | X0 * x | | Y0 * y + Z0 |
835  // | yy | = | X1 * x | + | Y1 * y + Z1 |
836  //
837  // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
838  //
839  // C0 = Y0 * y + Z0
840  // C1 = Y1 * y + Z1
841  //
842  // So the computation becomes:
843  //
844  // | x' | | X0 * x | | C0 |
845  // | y' | = | X1 * x | + | C1 |
846  //
847 
848  const Vector2 X(affineTransform->data() + 0);
849  const Vector2 c(Vector2(affineTransform->data() + 3) * Scalar(y) + Vector2(affineTransform->data() + 6));
850 
851  for (unsigned int x = 0u; x < outputWidth; ++x)
852  {
853  const Vector2 inputPosition = X * Scalar(x) + c;
854 
855 #ifdef OCEAN_DEBUG
856  const Scalar debugX = (*affineTransform)[0] * Scalar(x) + (*affineTransform)[3] * Scalar(y) + (*affineTransform)[6];
857  const Scalar debugY = (*affineTransform)[1] * Scalar(x) + (*affineTransform)[4] * Scalar(y) + (*affineTransform)[7];
858  ocean_assert(inputPosition.isEqual(Vector2(debugX, debugY), Scalar(0.01)));
859 #endif
860 
861  const unsigned int inputX = Numeric::round32(inputPosition.x());
862  const unsigned int inputY = Numeric::round32(inputPosition.y());
863 
864  if (inputX < inputWidth && inputY < inputHeight)
865  *outputData = *(PixelType*)(input + inputY * (inputWidth * tChannels + inputPaddingElements) + inputX * tChannels);
866  else
867  *outputData = *bColor;
868 
869  outputData++;
870  }
871 
872  outputData = (PixelType*)((uint8_t*)outputData + outputPaddingElements);
873  }
874 }
875 
876 template <typename T, unsigned int tChannels>
877 void FrameInterpolatorNearestPixel::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
878 {
879  static_assert(tChannels > 0u, "Invalid channel number!");
880 
881  ocean_assert(input != nullptr && output != nullptr);
882  ocean_assert(inputWidth > 0u && inputHeight > 0u);
883  ocean_assert(outputWidth > 0u && outputHeight > 0u);
884  ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
885 
886  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
887 
888  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
889  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
890 
891  typedef typename DataType<T, tChannels>::Type PixelType;
892 
893  const T zeroColor[tChannels] = {T(0)};
894  const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
895 
896  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
897  {
898  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
899 
900  for (unsigned int x = 0u; x < outputWidth; ++x)
901  {
902  const Vector2 outputPosition = Vector2(Scalar(x), Scalar(y));
903  const Vector2 inputPosition(*input_H_output * outputPosition);
904 
905  const unsigned int inputX = Numeric::round32(inputPosition.x());
906  const unsigned int inputY = Numeric::round32(inputPosition.y());
907 
908  if (inputX < inputWidth && inputY < inputHeight)
909  {
910  *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
911  }
912  else
913  {
914  *outputData = bColor;
915  }
916 
917  outputData++;
918  }
919  }
920 }
921 
922 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
923 
924 template <unsigned int tChannels>
925 inline void FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
926 {
927  static_assert(tChannels >= 1u, "Invalid channel number!");
928 
929  ocean_assert(input && output);
930  ocean_assert(inputWidth > 0u && inputHeight > 0u);
931  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
932  ocean_assert(affineTransform);
933  ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
934 
935  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
936 
937  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
938 
939  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
940  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
941 
942  PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
943 
944  OCEAN_ALIGN_DATA(16)
945  unsigned int nearestNeighbours[4];
946 
947  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
948  const __m128 m128_f_X0 = _mm_set_ps1(float((*affineTransform)(0, 0)));
949  const __m128 m128_f_X1 = _mm_set_ps1(float((*affineTransform)(1, 0)));
950 
951  // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
952  const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputWidth * tChannels + inputPaddingElements);
953 
954  // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
955  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
956 
957  // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
958  const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
959 
960  // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
961  const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
962 
963  // m128_i_zero = [0, 0, 0, 0]
964  const __m128i m128_i_zero = _mm_setzero_si128();
965 
966  // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
967  const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
968 
969  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
970  {
971  // We can slightly optimize the 3x3 matrix multiplication:
972  //
973  // | X0 Y0 Z0 | | x |
974  // | X1 Y1 Z1 | * | y |
975  // | 0 0 1 | | 1 |
976  //
977  // | xx | | X0 * x | | Y0 * y + Z0 |
978  // | yy | = | X1 * x | + | Y1 * y + Z1 |
979  //
980  // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
981  //
982  // C0 = Y0 * y + Z0
983  // C1 = Y1 * y + Z1
984  //
985  // So the computation becomes:
986  //
987  // | x' | | X0 * x | | C0 |
988  // | y' | = | X1 * x | + | C1 |
989 
990  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
991  const __m128 m128_f_C0 = _mm_set_ps1(float((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2)));
992  const __m128 m128_f_C1 = _mm_set_ps1(float((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2)));
993 
994  for (unsigned int x = 0u; x < outputWidth; x += 4u)
995  {
996  if (x + 4u > outputWidth)
997  {
998  // the last iteration will not fit into the output frame,
999  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1000 
1001  ocean_assert(x >= 4u && outputWidth > 4u);
1002  const unsigned int newX = outputWidth - 4u;
1003 
1004  ocean_assert(x > newX);
1005  outputPixelData -= x - newX;
1006 
1007  x = newX;
1008 
1009  // the for loop will stop after this iteration
1010  ocean_assert(!(x + 4u < outputWidth));
1011  }
1012 
1013  // we need four successive x coordinate floats:
1014  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1015  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1016 
1017  // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1018  const __m128 m128_f_inputX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1019  const __m128 m128_f_inputY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1020 
1021  // Compute the coordinates of the nearest neighbors
1022  const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1023  const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1024 
1025  // Note: Detection of input position outside the input image
1026  //
1027  // If the input point is outside the input image, then set the index
1028  // of its nearest neighbor to a value that is above the number of
1029  // available pixels in the image. When writing to the output, a
1030  // check will make sure to use the background color for those
1031  // pixels:
1032  //
1033  // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1034  // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1035  //
1036  // This approach keeps the amount of data that has to be transferred
1037  // between SSE and CPU registers to a minimum.
1038 
1039  // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1040  const __m128i m128_i_isOutsideImage = _mm_or_si128(
1041  _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1042  _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1043 
1044  // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1045  // nearestNeighborsElement = (isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1046  const __m128i m_128_i_nearestNeighborElements = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels)));
1047  _mm_store_si128((__m128i*)nearestNeighbours, m_128_i_nearestNeighborElements);
1048 
1049  // Update the output pixels
1050  outputPixelData[0] = nearestNeighbours[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[0]) : *bColor;
1051  outputPixelData[1] = nearestNeighbours[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[1]) : *bColor;
1052  outputPixelData[2] = nearestNeighbours[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[2]) : *bColor;
1053  outputPixelData[3] = nearestNeighbours[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[3]) : *bColor;
1054 
1055  outputPixelData += 4u;
1056  }
1057 
1058  outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1059  }
1060 }
1061 
1062 template <typename T, unsigned int tChannels>
1063 void FrameInterpolatorNearestPixel::homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1064 {
1065  static_assert(tChannels > 0u, "Invalid channel number!");
1066 
1067  ocean_assert(input != nullptr && output != nullptr);
1068  ocean_assert(inputWidth > 0u && inputHeight > 0u);
1069  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1070  ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1071 
1072  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1073 
1074  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1075  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1076 
1077  typedef typename DataType<T, tChannels>::Type PixelType;
1078 
1079  const T zeroColor[tChannels] = {T(0)};
1080  const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1081 
1082  OCEAN_ALIGN_DATA(16) unsigned int nearestNeighbourElementOffsets[4];
1083 
1084  // | X0 Y0 Z0 | | x |
1085  // Homography H = | X1 Y1 Z1 |, point p = | y |
1086  // | X2 Y2 Z2 | | 1 |
1087  //
1088  // | xx |
1089  // pp = H * p = | yy |
1090  // | zz |
1091  //
1092  // | xx | | X0 Y0 Z0 | | x |
1093  // <=> | yy | = | X1 Y1 Z1 | * | y |
1094  // | zz | | X2 Y2 Z2 | | 1 |
1095  //
1096  // | xx | | X0 * x | | Y0 * y + Z0 |
1097  // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1098  // | zz | | X2 * x | | Y2 * y + Z2 |
1099  //
1100  // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1101  // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1102  // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1103  //
1104  // Where C is a constant term that can be pre-computed (per image row)
1105  //
1106  // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1107  // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1108 
1109  // [Xi, Xi, Xi, Xi], i = {0, 1, 2}
1110  const __m128 m128_f_X0 = _mm_set_ps1((float)(*input_H_output)(0, 0));
1111  const __m128 m128_f_X1 = _mm_set_ps1((float)(*input_H_output)(1, 0));
1112  const __m128 m128_f_X2 = _mm_set_ps1((float)(*input_H_output)(2, 0));
1113 
1114  // Store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1115  const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
1116 
1117  const unsigned int inputPixelElementIndexEnd = inputHeight * inputStrideElements;
1118 
1119  // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
1120  const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
1121 
1122  // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
1123  const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
1124 
1125  // [tChannels, tChannels, tChannels tChannels]
1126  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
1127 
1128  // m128_i_zero = [0, 0, 0, 0]
1129  const __m128i m128_i_zero = _mm_setzero_si128();
1130 
1131  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1132  {
1133  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1134 
1135  // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1136  const __m128 m128_f_C0 = _mm_set_ps1((float)((*input_H_output)(0, 1) * Scalar(y) + ((*input_H_output)(0, 2))));
1137  const __m128 m128_f_C1 = _mm_set_ps1((float)((*input_H_output)(1, 1) * Scalar(y) + ((*input_H_output)(1, 2))));
1138  const __m128 m128_f_C2 = _mm_set_ps1((float)((*input_H_output)(2, 1) * Scalar(y) + ((*input_H_output)(2, 2))));
1139 
1140  for (unsigned int x = 0u; x < outputWidth; x += 4u)
1141  {
1142  if (x + 4u > outputWidth)
1143  {
1144  // the last iteration will not fit into the output frame,
1145  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1146 
1147  ocean_assert(x >= 4u && outputWidth > 4u);
1148  const unsigned int newX = outputWidth - 4u;
1149 
1150  ocean_assert(x > newX);
1151  outputPixelData -= x - newX;
1152 
1153  x = newX;
1154 
1155  // the for loop will stop after this iteration
1156  ocean_assert(!(x + 4u < outputWidth));
1157  }
1158 
1159  // we need four successive x coordinate floats:
1160  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1161  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1162 
1163  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1164  const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1165  const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1166  const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
1167 
1168 #ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
1169 
1170  // we calculate the (approximated) inverse of zz,
1171  // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
1172  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1173  const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
1174 
1175  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1176  const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
1177  const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
1178 
1179 #else
1180 
1181  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1182  const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
1183  const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
1184 
1185 #endif // USE_APPROXIMATED_INVERSE_OF_ZZ
1186 
1187  // Compute the coordinates of the nearest neighbors
1188  const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1189  const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1190 
1191  // Note: Detection of input position outside the input image
1192  //
1193  // If the input point is outside the input image, then set the index
1194  // of its nearest neighbor to a value that is above the number of
1195  // available pixels in the image. When writing to the output, a
1196  // check will make sure to use the background color for those
1197  // pixels:
1198  //
1199  // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1200  // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1201  //
1202  // This approach keeps the amount of data that has to be transferred
1203  // between SSE and CPU registers to a minimum.
1204 
1205  // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1206  const __m128i m128_i_isOutsideImage = _mm_or_si128(
1207  _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1208  _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1209 
1210  // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1211  // m_128_i_nearestNeighbors = (isOutsideImage ? 0xFFFFFFFF : inputY * inputWidth + inputX)
1212  const __m128i m_128_i_nearestNeighbors = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels))); // nn = y' * inputWidth + x'
1213  _mm_store_si128((__m128i*)nearestNeighbourElementOffsets, m_128_i_nearestNeighbors);
1214 
1215  // Update the output pixels
1216  outputPixelData[0] = nearestNeighbourElementOffsets[0] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1217  outputPixelData[1] = nearestNeighbourElementOffsets[1] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1218  outputPixelData[2] = nearestNeighbourElementOffsets[2] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1219  outputPixelData[3] = nearestNeighbourElementOffsets[3] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1220 
1221  outputPixelData += 4u;
1222  }
1223  }
1224 }
1225 
1226 #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1227 
1228 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1229 
1230 template <unsigned int tChannels>
1231 void FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1232 {
1233  // The following optimizations have been applied:
1234  //
1235  // - Matrix-vector multiplication for affine transformations:
1236  //
1237  // | x' | | X0 Y0 Z0 | | x |
1238  // | y' | = | X1 Y1 Z1 | * | y |
1239  // | 1 | | 0 0 1 | | 1 |
1240  //
1241  // which is
1242  //
1243  // x' = X0 * x + Y0 * y + Z0
1244  // y' = X1 * x + Y1 * y + Z1
1245  //
1246  // We can slightly optimize this operation, since y is constant within the inner
1247  // loop. The two terms on the right side in the above equations can be
1248  // pre-calculated:
1249  //
1250  // C0 = Y0 * y + Z0
1251  // C1 = Y1 * y + Z1
1252  //
1253  // So the computation becomes:
1254  //
1255  // | x' | | X0 * x | | C0 |
1256  // | y' | = | X1 * x | + | C1 |
1257  //
1258  // - For better utilization of cache coherence, the (output) image is processed
1259  // in blocks (64 x 64 pixels, if possible)
1260  //
1261  // - Integer fixed-point arithmetic.
1262  //
1263  // - Update products from floating point numbers with the beginning of blocks,
1264  // because the rounding error of fixed-point operations increases for larger
1265  // values:
1266  //
1267  // f - float number
1268  // i - fixed-point representation of f
1269  // v - coordinate value
1270  // eps = (f - i) - loss of precision (eps > 0)
1271  //
1272  // Rounding error:
1273  //
1274  // e = |(v * f) - (v * i)| = |v * (f - i)| = |v * eps|
1275  // (increases linearly for larger coordinate values v, i.e., with image size)
1276  //
1277  // The rounding error can be kept at bay by replacing the product (v * i) with
1278  // (v * f) at the beginning of each block followed by adding an offset for all
1279  // other pixels in the block, (N * f) where N is the number of pixels which
1280  // are processed concurrently by SIMD instructions.
1281  //
1282 
1283  static_assert(tChannels >= 1u, "Invalid channel number!");
1284 
1285  constexpr unsigned int fractionalBits = 15u;
1286  constexpr unsigned int totalBits = (unsigned int)(CHAR_BIT * sizeof(int));
1287 
1288  static_assert((fractionalBits + 1u /* sign bit */) < totalBits, "Number of fractional bits exceeds number of total bits");
1289 
1290  constexpr unsigned int maxImageEdgeLength = 1u << (totalBits - fractionalBits - 1u /* sign bit */);
1291 
1292  // Scale to convert float value, v, to fixed-point value, v_q = int(round(fixedPointScale * v))
1293  constexpr Scalar fixedPointScale = Scalar(1u << fractionalBits);
1294 
1295  // Number of pixels processed by NEON in each iteration
1296  constexpr unsigned int pixelsPerIteration = 4u;
1297 
1298  ocean_assert(input && output);
1299  ocean_assert_and_suppress_unused(inputWidth > 0u && inputHeight > 0u && inputWidth <= maxImageEdgeLength && inputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1300  ocean_assert_and_suppress_unused(outputWidth >= pixelsPerIteration && outputHeight > 0u && outputWidth <= maxImageEdgeLength && outputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1301  ocean_assert(affineTransform);
1302  ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
1303 
1304  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1305 
1306  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1307 
1308  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1309  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
1310 
1311  PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
1312 
1313  const unsigned int outputRowEnd = firstOutputRow + numberOutputRows;
1314 
1315  // Inidices of the final nearest neighbor pixel, which are used to the interpolation
1316  unsigned int nearestNeighboursElements[4];
1317 
1318  // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
1319  const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
1320 
1321  // m128_u_inputWidth = [inputWidth, intputWidth, intputWidth, intputWidth], and the same for inputHeight
1322  const uint32x4_t m128_u_inputWidth = vdupq_n_u32(inputWidth);
1323  const uint32x4_t m128_u_inputHeight = vdupq_n_u32(inputHeight);
1324 
1325  // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
1326  const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputWidth * tChannels + inputPaddingElements);
1327 
1328  // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
1329  const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1330 
1331  // m128_s_offsets_0123 = [0, 1, 2, 3]
1332  const int offsets_0123[4] = { 0, 1, 2, 3 };
1333  const int32x4_t m128_s_offsets_0123 = vld1q_s32(offsets_0123);
1334 
1335  // m128_f_pixelsPerIteration = [4.0f, 4.0f, 4.0f, 4.0f]
1336  const float32x4_t m128_f_pixelsPerIteration = vdupq_n_f32((float)pixelsPerIteration);
1337 
1338  // Float-based transformation value X0 multiplied with scale for fixed-point
1339  // numbers. This is used to update the fixed-point products, X0 * x and X1 * x,
1340  // at the beginning of each block, i.e.
1341  // m128_f_q_X0 = [v, v, v, v], v = fixedPointScale * X0, and the same for X1
1342  const float32x4_t m128_f_X0 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(0, 0)));
1343  const float32x4_t m128_f_X1 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(1, 0)));
1344 
1345  // Increment that is added to fixed-point product computed at the beginning of
1346  // each block, X0 * x and X1 * x, in each iteration inside the block
1347  const int32x4_t m128_s_q_X0x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_pixelsPerIteration));
1348  const int32x4_t m128_s_q_X1x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_pixelsPerIteration));
1349 
1350  // Determine the optimal block size
1351  constexpr unsigned int blockSize = 64u;
1352  constexpr unsigned int blockElements = blockSize * blockSize;
1353  const unsigned int blockWidth = std::min(blockElements / std::min(numberOutputRows, blockSize), outputWidth);
1354  const unsigned int blockHeight = std::min(blockElements / blockWidth, numberOutputRows);
1355  ocean_assert(blockWidth > 0u && blockWidth <= outputWidth);
1356  ocean_assert(blockHeight > 0u && blockHeight <= numberOutputRows);
1357 
1358  // Index of pixel that is the last in a block of #pixelsPerIterations pixels, i.e. number of remaining pixels after
1359  // this point are less than #pixelsPerIterations. When this pixel index is reached all pointers will be moved left
1360  // so that we can process one last block of #pixelsPerIterations pixels. That also means that depending on the width
1361  // of the output image between [1, pixelsPerIterations) pixels will be computed a second time.
1362  const unsigned int lastMultipleNeonPixelBlockStart = outputWidth - pixelsPerIteration;
1363 
1364  // m128_f_lastMultipleNeonPixelBlockStart = [(float)(lastMultipleNeonPixelBlockStart + 0), (float)(lastMultipleNeonPixelBlockStart + 1), (float)(lastMultipleNeonPixelBlockStart + 2), (float)(lastMultipleNeonPixelBlockStart + 3)]
1365  const float32x4_t m128_f_lastMultipleNeonPixelBlockStart = vcvtq_f32_s32(vaddq_s32(vdupq_n_s32((int)lastMultipleNeonPixelBlockStart), m128_s_offsets_0123));
1366 
1367  // m128_s_q_X0x_lastMultipleNeonPixelBlockStart = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (lastMultipleNeonPixelBlockStart + i))), i = 0...3, and similarly for X1
1368  const int32x4_t m128_s_q_X0x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_lastMultipleNeonPixelBlockStart));
1369  const int32x4_t m128_s_q_X1x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_lastMultipleNeonPixelBlockStart));
1370 
1371  for (unsigned int blockYStart = firstOutputRow; blockYStart < outputRowEnd; blockYStart += blockHeight)
1372  {
1373  const unsigned int blockYEnd = std::min(blockYStart + blockHeight, outputRowEnd);
1374 
1375  for (unsigned int blockXStart = 0u; blockXStart < outputWidth; blockXStart += blockWidth)
1376  {
1377  const unsigned int blockXEnd = std::min(blockXStart + blockWidth, outputWidth);
1378 
1379  for (unsigned int y = blockYStart; y < blockYEnd; ++y)
1380  {
1381  outputPixelData = (PixelType*)(output + y * (outputWidth * tChannels + outputPaddingElements) + blockXStart * tChannels);
1382 
1383  // Constant parts, cf. optimization of matrix-vector multiplication above
1384  // m128_s_C0 = [C0, C0, C0, C0], C0 = int(round(leftShiftFactor * (Y0 * y + Z0))), and similarly for C1
1385  const int32x4_t m128_s_q_C0 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2))));
1386  const int32x4_t m128_s_q_C1 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2))));
1387 
1388  // Update products, X0 * x and X1 * x, from floating point numbers with the
1389  // beginning of this block, since the rounding error of fixed-point operations
1390  // increases for larger coordinate values, cf. list of optimizations above.
1391  //
1392  // m128_s_x_0123 = [blockXStart + 0, blockXStart + 1, blockXStart + 2, blockXStart + 3]
1393  const int32x4_t m128_s_x_0123 = vaddq_s32(vdupq_n_s32(int(blockXStart)), m128_s_offsets_0123);
1394 
1395  // m128_f_x_0123 = [(float)(x + 0), (float)(x + 1), (float)(x + 2), (float)(x + 3)]
1396  const float32x4_t m128_f_x_0123 = vcvtq_f32_s32(m128_s_x_0123);
1397 
1398  // m128_s_q_X0x = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (x + i))), i = 0...3, and similarly for X1
1399  int32x4_t m128_s_q_X0x = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_x_0123));
1400  int32x4_t m128_s_q_X1x = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_x_0123));
1401 
1402  for (unsigned int x = blockXStart; x < blockXEnd; x += pixelsPerIteration)
1403  {
1404  if (x + pixelsPerIteration > outputWidth)
1405  {
1406  ocean_assert(x + pixelsPerIteration > outputWidth);
1407  ocean_assert(x >= pixelsPerIteration && outputWidth > pixelsPerIteration);
1408  ocean_assert(lastMultipleNeonPixelBlockStart == (outputWidth - pixelsPerIteration));
1409 
1410  outputPixelData -= (x - lastMultipleNeonPixelBlockStart);
1411 
1412  x = lastMultipleNeonPixelBlockStart;
1413 
1414  m128_s_q_X0x = m128_s_q_X0x_lastMultipleNeonPixelBlockStart;
1415  m128_s_q_X1x = m128_s_q_X1x_lastMultipleNeonPixelBlockStart;
1416 
1417  // the for loop will stop after this iteration
1418  ocean_assert(!(x + pixelsPerIteration < outputWidth));
1419  }
1420 
1421  // Compute pixel location in the input image
1422  // m128_s_q_inputX = x' = C0 + X0 * x
1423  // m128_s_q_inputY = y' = C1 + X1 * y
1424  const int32x4_t m128_s_q_inputX = vaddq_s32(m128_s_q_C0, m128_s_q_X0x);
1425  const int32x4_t m128_s_q_inputY = vaddq_s32(m128_s_q_C1, m128_s_q_X1x);
1426 
1427  // Convert (signed) fixed-point location to unsigned int, i.e., negative values
1428  // will be larger than image dimensions (width, height), cf. note below
1429  //
1430  // m128_u_inputX = (unsigned int) round(inputX >> N)
1431  // m128_u_inputY = (unsigned int) round(inputY >> N)
1432  const uint32x4_t m128_u_inputX = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputX, fractionalBits));
1433  const uint32x4_t m128_u_inputY = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputY, fractionalBits));
1434 
1435  // Note: Detection of input position outside the input image
1436  //
1437  // If the input point is outside the input image, then set the index
1438  // of its nearest neighbor to a value that is above the number of
1439  // available pixels in the image. When writing to the output, a
1440  // check will make sure to use the background color for those
1441  // pixels:
1442  //
1443  // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1444  // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1445  //
1446  // This approach keeps the amount of data that has to be transferred
1447  // between NEON and CPU registers to a minimum.
1448 
1449  // Casting negative signed values to unsigned value results in very large values, e.g., ((unsigned int) -1) > inputWidth.
1450  // We'll exploit that below to check is pixel coordinates are outside the image.
1451  // m128_u_isOutsideImage = (x >= inputWidth || y >= inputHeight) ? 0xFFFFFFFF : 0x00000000;
1452  const uint32x4_t m128_u_isOutsideImage = vorrq_u32(vcgeq_u32(m128_u_inputX, m128_u_inputWidth), vcgeq_u32(m128_u_inputY, m128_u_inputHeight));
1453 
1454  // Determine the pixel indices of the nearest neighbors and store the result
1455  // If the pixel is outside the image then set the index of the nearest neighbor to the largest possible value
1456  // m_128_u_nearestNeighbors = m128_u_isOutsideImage | (inputY * inputStrideElements) + (inputX * channels);
1457  // which is equivalent to
1458  // m_128_u_nearestNeighborElements = (m128_u_isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1459  const uint32x4_t m_128_u_nearestNeighborsElements = vorrq_u32(m128_u_isOutsideImage, vaddq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), vmulq_u32(m128_u_inputX, m128_u_channels)));
1460  vst1q_u32(nearestNeighboursElements, m_128_u_nearestNeighborsElements);
1461 
1462  outputPixelData[0] = nearestNeighboursElements[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[0]) : *bColor;
1463  outputPixelData[1] = nearestNeighboursElements[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[1]) : *bColor;
1464  outputPixelData[2] = nearestNeighboursElements[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[2]) : *bColor;
1465  outputPixelData[3] = nearestNeighboursElements[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[3]) : *bColor;
1466 
1467  outputPixelData += pixelsPerIteration;
1468 
1469  // m128_s_q_X0x += m128_s_q_X0x_increment, and similarly for X1
1470  m128_s_q_X0x = vaddq_s32(m128_s_q_X0x, m128_s_q_X0x_increment);
1471  m128_s_q_X1x = vaddq_s32(m128_s_q_X1x, m128_s_q_X1x_increment);
1472  }
1473  }
1474  }
1475 
1476  outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1477  }
1478 }
1479 
1480 template <typename T, unsigned int tChannels>
1481 void FrameInterpolatorNearestPixel::homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1482 {
1483  static_assert(tChannels >= 1u, "Invalid channel number!");
1484 
1485  ocean_assert(input != nullptr && output != nullptr);
1486  ocean_assert(inputWidth > 0u && inputHeight > 0u);
1487  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1488  ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1489 
1490  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1491 
1492  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1493  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1494 
1495  typedef typename DataType<T, tChannels>::Type PixelType;
1496 
1497  const T zeroColor[tChannels] = {T(0)};
1498  const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1499 
1500  unsigned int validPixels[4];
1501  unsigned int nearestNeighbourElementOffsets[4];
1502 
1503  // | X0 Y0 Z0 | | x |
1504  // Homography H = | X1 Y1 Z1 |, point p = | y |
1505  // | X2 Y2 Z2 | | 1 |
1506  //
1507  // | xx |
1508  // pp = H * p = | yy |
1509  // | zz |
1510  //
1511  // | xx | | X0 Y0 Z0 | | x |
1512  // <=> | yy | = | X1 Y1 Z1 | * | y |
1513  // | zz | | X2 Y2 Z2 | | 1 |
1514  //
1515  // | xx | | X0 * x | | Y0 * y + Z0 |
1516  // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1517  // | zz | | X2 * x | | Y2 * y + Z2 |
1518  //
1519  // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1520  // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1521  // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1522  //
1523  // Where C is a constant term that can be pre-computed (per image row)
1524  //
1525  // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1526  // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1527 
1528  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
1529  const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
1530  const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
1531  const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
1532 
1533  // we store 4 floats: [0.5f, 0.5f, 0.5f, 0.5f]
1534  const float32x4_t m128_f_pointFive = vdupq_n_f32(0.5f);
1535  const float32x4_t m128_f_negPointFive = vdupq_n_f32(-0.5f);
1536 
1537  // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1538  const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
1539 
1540  const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1541 
1542  // we store 4 floats: [inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f], and same with inputHeight
1543  const float32x4_t m128_f_inputWidth_pointFive = vdupq_n_f32(float(inputWidth) - 0.5f);
1544  const float32x4_t m128_f_inputHeight_pointFive = vdupq_n_f32(float(inputHeight) - 0.5f);
1545 
1546  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1547  {
1548  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1549 
1550  // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1551  const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
1552  const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
1553  const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
1554 
1555  for (unsigned int x = 0u; x < outputWidth; x += 4u)
1556  {
1557  if (x + 4u > outputWidth)
1558  {
1559  // Since the last iteration will not fit into the output frame, we'll shift N pixel left so that it fits again (at most 3 pixels).
1560 
1561  ocean_assert(x >= 4u && outputWidth > 4u);
1562  const unsigned int newX = outputWidth - 4u;
1563 
1564  ocean_assert(x > newX);
1565  outputPixelData -= x - newX;
1566 
1567  x = newX;
1568 
1569  // the for loop will stop after this iteration
1570  ocean_assert(!(x + 4u < outputWidth));
1571  }
1572 
1573  // we need four successive x coordinate floats:
1574  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1575  float x_0123[4] = { float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u) };
1576  const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
1577 
1578  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1579  const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
1580  const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
1581  const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
1582 
1583 #ifdef USE_DIVISION_ARM64_ARCHITECTURE
1584 
1585  // using the division available from ARM64 is more precise
1586  const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
1587  const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
1588 
1589 #else
1590 
1591  // we calculate the (approximated) inverse of zz
1592  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1593  float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
1594  inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
1595 
1596  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1597  const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
1598  const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
1599 
1600 #endif // USE_DIVISION_ARM64_ARCHITECTURE
1601 
1602  // Mark pixels inside the input image as valid, all others as invalid
1603  const uint32x4_t m128_u_validPixelX = vandq_u32(vcltq_f32(m128_f_inputX, m128_f_inputWidth_pointFive), vcgtq_f32(m128_f_inputX, m128_f_negPointFive)); // inputX < (inputWidth - 0.5) && inputX >= -0.5 ? 0xFFFFFFFF : 0x00000000
1604  const uint32x4_t m128_u_validPixelY = vandq_u32(vcltq_f32(m128_f_inputY, m128_f_inputHeight_pointFive), vcgtq_f32(m128_f_inputY, m128_f_negPointFive)); // inputY < (inputHeight - 0.5) && inputY > -0.5 ? 0xFFFFFFFF : 0x00000000
1605 
1606  const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
1607 
1608  // Stop here if all pixels are invalid
1609  const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
1610  if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
1611  {
1612 #ifdef OCEAN_DEBUG
1613  // clang-format off
1614  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
1615  // clang-format on
1616  vst1q_u32(debugValidPixels, m128_u_validPixel);
1617  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
1618 #endif
1619 
1620  outputPixelData[0] = bColor;
1621  outputPixelData[1] = bColor;
1622  outputPixelData[2] = bColor;
1623  outputPixelData[3] = bColor;
1624 
1625  outputPixelData += 4;
1626 
1627  continue;
1628  }
1629 
1630  // Determine the pixel indices of the nearest neighbors and store the result
1631  vst1q_u32(validPixels, m128_u_validPixel);
1632  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
1633 
1634  const uint32x4_t m128_u_inputX = vcvtq_u32_f32(vaddq_f32(m128_f_inputX, m128_f_pointFive)); // Round to nearest integer: x' = (int) (x + 0.5f)
1635  const uint32x4_t m128_u_inputY = vcvtq_u32_f32(vaddq_f32(m128_f_inputY, m128_f_pointFive)); // Round to nearest integer: y' = (int) (y + 0.5f)
1636  const uint32x4_t m_128_u_nearestNeighbourElementOffsets = vmlaq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), m128_u_inputX, m128_u_channels); // nn = y' * inputStrideElements + x' * channels
1637  vst1q_u32(nearestNeighbourElementOffsets, m_128_u_nearestNeighbourElementOffsets);
1638 
1639 #ifdef OCEAN_DEBUG
1640  unsigned int debugInputX[4];
1641  unsigned int debugInputY[4];
1642  vst1q_u32(debugInputX, m128_u_inputX);
1643  vst1q_u32(debugInputY, m128_u_inputY);
1644  ocean_assert(!validPixels[0] || (debugInputX[0] < inputWidth && debugInputY[0] < inputHeight));
1645  ocean_assert(!validPixels[1] || (debugInputX[1] < inputWidth && debugInputY[1] < inputHeight));
1646  ocean_assert(!validPixels[2] || (debugInputX[2] < inputWidth && debugInputY[2] < inputHeight));
1647  ocean_assert(!validPixels[3] || (debugInputX[3] < inputWidth && debugInputY[3] < inputHeight));
1648 #endif
1649 
1650  outputPixelData[0] = validPixels[0] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1651  outputPixelData[1] = validPixels[1] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1652  outputPixelData[2] = validPixels[2] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1653  outputPixelData[3] = validPixels[3] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1654 
1655  outputPixelData += 4;
1656  }
1657  }
1658 }
1659 
1660 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1661 
1662 template <unsigned int tChannels>
1663 void FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1664 {
1665  static_assert(tChannels > 0u, "Invalid channel number!");
1666 
1667  ocean_assert(input != nullptr && output != nullptr && outputMask != nullptr);
1668  ocean_assert(inputWidth > 0u && inputHeight > 0u);
1669  ocean_assert(outputWidth > 0u && outputHeight > 0u);
1670  ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1671 
1672  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1673 
1674  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1675  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1676  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1677 
1678  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1679 
1680  output += firstOutputRow * outputStrideElements;
1681  outputMask += firstOutputRow * outputMaskStrideElements;
1682 
1683  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1684  {
1685  PixelType* outputPixel = (PixelType*)(output);
1686 
1687  for (unsigned int x = 0; x < outputWidth; ++x)
1688  {
1689  const Vector2 outputPosition = Vector2(Scalar(int(x) + outputOriginX), Scalar(int(y) + outputOriginY));
1690  const Vector2 inputPosition(*input_H_output * outputPosition);
1691 
1692  const unsigned int inputX = Numeric::round32(inputPosition.x());
1693  const unsigned int inputY = Numeric::round32(inputPosition.y());
1694 
1695  if (inputX < inputWidth && inputY < inputHeight)
1696  {
1697  *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1698  *outputMask = maskValue;
1699  }
1700  else
1701  {
1702  *outputMask = 0xFFu - maskValue;
1703  }
1704 
1705  ++outputPixel;
1706  ++outputMask;
1707  }
1708 
1709  output += outputStrideElements;
1710  outputMask += outputMaskPaddingElements;
1711  }
1712 }
1713 
1714 template <unsigned int tChannels>
1715 void FrameInterpolatorNearestPixel::transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
1716 {
1717  static_assert(tChannels > 0u, "Invalid channel number!");
1718 
1719  ocean_assert(lookupTable != nullptr);
1720  ocean_assert(input != nullptr && output != nullptr);
1721 
1722  ocean_assert(inputWidth != 0u && inputHeight != 0u);
1723  ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1724 
1725  const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1726 
1727  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1728  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1729 
1730  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1731 
1732  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1733  const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
1734 
1735  if (offset)
1736  {
1737  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1738  {
1739  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1740 
1741  for (unsigned int x = 0u; x < outputWidth; ++x)
1742  {
1743  const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1744  const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1745 
1746  const unsigned int inputX = Numeric::round32(inputPosition.x());
1747  const unsigned int inputY = Numeric::round32(inputPosition.y());
1748 
1749  if (inputX < inputWidth && inputY < inputHeight)
1750  {
1751  *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1752  }
1753  else
1754  {
1755  *outputData = *bColor;
1756  }
1757 
1758  ++outputData;
1759  }
1760  }
1761  }
1762  else
1763  {
1764  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1765  {
1766  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1767 
1768  for (unsigned int x = 0u; x < outputWidth; ++x)
1769  {
1770  const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1771 
1772  const unsigned int inputX = Numeric::round32(inputPosition.x());
1773  const unsigned int inputY = Numeric::round32(inputPosition.y());
1774 
1775  if (inputX < inputWidth && inputY < inputHeight)
1776  {
1777  *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1778  }
1779  else
1780  {
1781  *outputData = *bColor;
1782  }
1783 
1784  ++outputData;
1785  }
1786  }
1787  }
1788 }
1789 
1790 template <unsigned int tChannels>
1791 void FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
1792 {
1793  static_assert(tChannels > 0u, "Invalid channel number!");
1794 
1795  ocean_assert(lookupTable != nullptr);
1796  ocean_assert(input != nullptr && output != nullptr);
1797 
1798  ocean_assert(inputWidth != 0u && inputHeight != 0u);
1799  ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1800 
1801  ocean_assert(NumericT<unsigned int>::isInsideValueRange(lookupTable->sizeX()));
1802  const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1803 
1804  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1805  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1806  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1807 
1808  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1809 
1810  output += firstRow * outputStrideElements;
1811  outputMask += firstRow * outputMaskStrideElements;
1812 
1813  if (offset)
1814  {
1815  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1816  {
1817  PixelType* outputPixel = (PixelType*)(output);
1818 
1819  for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1820  {
1821  const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1822  const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1823 
1824  const unsigned int inputX = Numeric::round32(inputPosition.x());
1825  const unsigned int inputY = Numeric::round32(inputPosition.y());
1826 
1827  if (inputX < inputWidth && inputY < inputHeight)
1828  {
1829  *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1830  *outputMask = maskValue;
1831  }
1832  else
1833  {
1834  *outputMask = 0xFF - maskValue;
1835  }
1836 
1837  ++outputPixel;
1838  ++outputMask;
1839  }
1840 
1841  output += outputStrideElements;
1842  outputMask += outputMaskPaddingElements;
1843  }
1844  }
1845  else
1846  {
1847  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1848  {
1849  PixelType* outputPixel = (PixelType*)(output);
1850 
1851  for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1852  {
1853  const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1854 
1855  const unsigned int inputX = Numeric::round32(inputPosition.x());
1856  const unsigned int inputY = Numeric::round32(inputPosition.y());
1857 
1858  if (inputX < inputWidth && inputY < inputHeight)
1859  {
1860  *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1861  *outputMask = maskValue;
1862  }
1863  else
1864  {
1865  *outputMask = 0xFF - maskValue;
1866  }
1867 
1868  ++outputPixel;
1869  ++outputMask;
1870  }
1871 
1872  output += outputStrideElements;
1873  outputMask += outputMaskPaddingElements;
1874  }
1875  }
1876 }
1877 
1878 } // namespace CV
1879 
1880 } // namespace Ocean
1881 
1882 #endif // META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameInterpolatorNearestPixel.h:49
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
Definition: FrameInterpolatorNearestPixel.h:589
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine image transformation to a frame (with zipped pixel format) and renders using neares...
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search.
static bool transform(const Frame &input, Frame &output, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, Worker *worker=nullptr)
Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation...
static bool transformMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &lookupTable, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFFu, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
Definition: FrameInterpolatorNearestPixel.h:594
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
Definition: FrameInterpolatorNearestPixel.h:599
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame by application of a ho...
This class implements highly optimized interpolation functions with fixed properties.
Definition: FrameInterpolatorNearestPixel.h:189
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements a nearest pixel frame interpolator.
Definition: FrameInterpolatorNearestPixel.h:35
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the proces...
Definition: FrameInterpolatorNearestPixel.h:605
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
Definition: FrameInterpolatorNearestPixel.h:664
static void affine8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_A_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
Definition: FrameInterpolatorNearestPixel.h:620
static void transform8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition: FrameInterpolatorNearestPixel.h:719
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition: FrameInterpolatorNearestPixel.h:39
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition: FrameInterpolatorNearestPixel.h:877
static void resizeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a given frame by a nearest pixel search.
Definition: FrameInterpolatorNearestPixel.h:757
static void rotate90(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degree.
Definition: FrameInterpolatorNearestPixel.h:745
static void affine8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition: FrameInterpolatorNearestPixel.h:925
static void transformMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable *lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition: FrameInterpolatorNearestPixel.h:1791
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorNearestPixel.h:1663
static void affine8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition: FrameInterpolatorNearestPixel.h:806
static void homographySSESubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition: FrameInterpolatorNearestPixel.h:1063
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 &input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorNearestPixel.h:706
static void affine8BitPerChannelIntegerNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using ...
Definition: FrameInterpolatorNearestPixel.h:1231
static void transform8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame (with zipped pixel format) into an output frame by applica...
Definition: FrameInterpolatorNearestPixel.h:1715
static void transformMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable &lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition: FrameInterpolatorNearestPixel.h:732
static void homographyNEONSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition: FrameInterpolatorNearestPixel.h:1481
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a 2D pixel position with pixel precision.
Definition: PixelPosition.h:65
T y() const
Returns the vertical coordinate position of this object.
Definition: PixelPosition.h:470
T x() const
Returns the horizontal coordinate position of this object.
Definition: PixelPosition.h:458
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
Template class allowing to define an array of data types.
Definition: DataType.h:27
This class implements Ocean's image class.
Definition: Frame.h:1792
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition: Frame.h:4153
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition: Frame.h:4148
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition: Frame.h:4138
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition: Frame.h:4143
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition: Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition: Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition: Lookup2.h:941
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition: Lookup2.h:636
T bilinearValue(const TScalar x, const TScalar y) const
Applies a lookup for a specific position in this lookup object.
Definition: Lookup2.h:1815
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition: base/Memory.h:303
This class provides basic numeric functionalities.
Definition: Numeric.h:57
static constexpr int32_t round32(const T value)
Returns the rounded 32 bit integer value of a given value.
Definition: Numeric.h:2064
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition: Numeric.h:2087
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition: SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition: SquareMatrix3.h:1046
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition: SquareMatrix3.h:1341
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition: DataType.h:501
const T & x() const noexcept
Returns the x value.
Definition: Vector2.h:698
const T & y() const noexcept
Returns the y value.
Definition: Vector2.h:710
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition: Vector2.h:746
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
PixelPositionT< int > PixelPositionI
Definition of a PixelPosition object with a data type allowing positive and negative coordinate value...
Definition: PixelPosition.h:41
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition: SquareMatrix3.h:35
float Scalar
Definition of a scalar type.
Definition: Math.h:128
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition: Vector3.h:22
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition: Vector2.h:21
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32