Ocean
Loading...
Searching...
No Matches
FrameInterpolatorNearestPixel.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
10
11#include "ocean/cv/CV.h"
14
15#include "ocean/base/DataType.h"
16#include "ocean/base/Frame.h"
17#include "ocean/base/Memory.h"
18#include "ocean/base/Worker.h"
19
20#include "ocean/math/Lookup2.h"
22
23namespace Ocean
24{
25
26namespace CV
27{
28
29/**
30 * This class implements a nearest pixel frame interpolator.
31 * Actually, no pixels are interpolated, but the color intensities from the nearest pixels (e.g., based on rounding) is used.<br>
32 * @ingroup cv
33 */
34class OCEAN_CV_EXPORT FrameInterpolatorNearestPixel
35{
36 public:
37
38 /// Definition of a lookup table for 2D vectors.
40
41 public:
42
43 /**
44 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
45 * Best practice is to avoid using these functions if binary size matters,<br>
46 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
47 */
48 class OCEAN_CV_EXPORT Comfort
49 {
50 public:
51
52 /**
53 * Resizes a given frame by a nearest pixel search.
54 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
55 * @param source The source frame that will be resized, must have a zipped pixel format, must be valid
56 * @param target The target frame that receives the image information of the source frame, the pixel format and pixel origin must match with the source frame
57 * @param worker Optional worker object to distribute the computational load
58 * @return True, if succeeded
59 */
60 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
61
62 /**
63 * Resizes a given frame in place by a nearest pixel search.
64 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
65 * @param frame The frame that will be resized, must have a zipped pixel format, must be valid
66 * @param targetWidth Width of the new target frame in pixel, with range [1, infinity)
67 * @param targetHeight Height of the new target frame in pixel, with range [1, infinity)
68 * @param worker Optional worker object to distribute the computational load
69 * @return True, if succeeded
70 */
71 static inline bool resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker = nullptr);
72
73 /**
74 * Applies an affine image transformation to a frame (with zipped pixel format) and renders using nearest-neighbor interpolation
75 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
76 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation
77 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
78 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
79 * @param output The output frame resulting by application of the given affine transformation, with same pixel format and pixel origin as the input frame, must have a valid dimension
80 * @param input_A_output Affine transformation used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
81 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
82 * @param worker Optional worker object to distribute the computational load
83 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
84 * @return True, if succeeded
85 */
86 static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
87
88 /**
89 * Transforms a given input frame (with zipped pixel format) into an output frame by application of a homography.
90 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
91 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
92 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
93 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
94 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
95 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
96 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
97 * @param worker Optional worker object to distribute the computational load
98 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
99 * @return True, if succeeded
100 */
101 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
102
103 /**
104 * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
105 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
106 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
107 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
108 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
109 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
110 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
111 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
112 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
113 * @param worker Optional worker object to distribute the computational load
114 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
115 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
116 * @return True, if succeeded
117 * @see Geometry::Homography::coversHomographyInputFrame().
118 */
119 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
120
121 /**
122 * Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation lookup table.
123 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
124 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
125 * @param input The input frame which will be transformed, must have a zipped pixel format, must be valid
126 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
127 * @param lookupTable The lookup table which defines the transformation, must be valid
128 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
129 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
130 * @param worker Optional worker object to distribute the computation
131 * @return True, if succeeded
132 */
133 static bool transform(const Frame& input, Frame& output, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, Worker* worker = nullptr);
134
135 /**
136 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
137 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
138 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
139 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
140 * @param input The input frame which will be transformed
141 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
142 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
143 * @param lookupTable The lookup table which defines the transformation, must be valid
144 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
145 * @param worker Optional worker object to distribute the computation
146 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
147 * @return True, if succeeded
148 */
149 static bool transformMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& lookupTable, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
150
151 /**
152 * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
153 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate90().
154 * @param input The input frame which will be rotated, must be valid
155 * @param output The resulting rotated output frame, the frame type will be set automatically
156 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
157 * @param worker Optional worker object to distribute the computation
158 * @return True, if succeeded
159 */
160 static inline bool rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker = nullptr);
161
162 /**
163 * Rotates a given frame by 180 degrees.
164 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate180().
165 * @param input The input frame which will be rotated, must be valid
166 * @param output The resulting rotated output frame, the frame type will be set automatically
167 * @param worker Optional worker object to distribute the computation
168 * @return True, if succeeded
169 */
170 static inline bool rotate180(const Frame& input, Frame& output, Worker* worker = nullptr);
171
172 /**
173 * Rotates a given frame with 90 degree steps.
174 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate().
175 * @param input The input frame which will be rotated, must be valid
176 * @param output The resulting rotated output frame, the frame type will be set automatically
177 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
178 * @param worker Optional worker object to distribute the computation
179 * @return True, if succeeded
180 */
181 static bool rotate(const Frame& input, Frame& output, const int angle, Worker* worker = nullptr);
182 };
183
184 /**
185 * This class implements highly optimized interpolation functions with fixed properties.
186 * The functions can be significantly faster as these functions are tailored to the specific properties.
187 */
188 class OCEAN_CV_EXPORT SpecialCases
189 {
190 public:
191
192 /**
193 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
194 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
195 * @param source The source frame buffer with resolution 400x400, must be valid
196 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
197 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
198 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
199 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
200 */
201 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
202 };
203
204 /**
205 * Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the process.
206 * @param source The source frame buffer, must be valid
207 * @param target The target frame buffer, must be valid
208 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
209 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
210 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
211 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
212 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
213 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
214 * @param worker Optional worker to distribute the computation
215 * @tparam T Data type the pixel channel values
216 * @tparam tChannels Number of data channels, with range [1, infinity)
217 */
218 template <typename T, unsigned int tChannels>
219 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
220
221 /**
222 * Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
223 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
224 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation.
225 * @param input The input frame that will be transformed, must be valid
226 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
227 * @param inputHeight Height of both input images pixel, with range [1, infinity)
228 * @param input_A_output The affine transformation used to transform the given input frame, transforming output points to input points, must be valid
229 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
230 * @param output The output frame using the given affine transform, must be valid
231 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
232 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
233 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
234 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
235 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
236 * @param worker Optional worker object to distribute the computational load
237 * @tparam tChannels The number of channels of the frame, with range [1, infinity)
238 */
239 template <unsigned int tChannels>
240 static inline void affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_A_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
241
242 /**
243 * Transforms a given input frame into an output frame by application of a homography.
244 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
245 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
246 * @param input The input frame that will be transformed, must be valid
247 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
248 * @param inputHeight Height of both input images pixel, with range [1, infinity)
249 * @param input_H_output The homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
250 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
251 * @param output The output frame using the given homography, must be valid
252 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
253 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
254 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
255 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
256 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
257 * @param worker Optional worker object to distribute the computational load
258 * @tparam T Data type of each pixel channel, e.g., float, double, int
259 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
260 * @see homographyMask8BitPerChannel().
261 */
262 template <typename T, unsigned int tChannels>
263 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
264
265 /**
266 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
267 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
268 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
269 * @param input The input frame that will be transformed, must be valid
270 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
271 * @param inputHeight Height of both input images pixel, with range [1, infinity)
272 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
273 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
274 * @param output The output frame using the given homography, must be valid
275 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
276 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
277 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
278 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
279 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
280 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
281 * @param worker Optional worker object to distribute the computational load
282 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
283 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
284 * @see homographyMask8BitPerChannel().
285 */
286 template <unsigned int tChannels>
287 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
288
289 /**
290 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
291 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
292 * @param input The input frame which will be transformed, must be valid
293 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
294 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
295 * @param lookupTable The lookup table which defines the transformation, must be valid
296 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
297 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
298 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
299 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
300 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
301 * @param worker Optional worker object to distribute the computation
302 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
303 */
304 template <unsigned int tChannels>
305 static inline void transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
306
307 /**
308 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
309 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
310 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
311 * @param input The input frame which will be transformed, must be valid
312 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
313 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
314 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
315 * @param lookupTable The lookup table which defines the transformation, must be valid
316 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
317 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
318 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
319 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
320 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
321 * @param worker Optional worker object to distribute the computation
322 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
323 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
324 */
325 template <unsigned int tChannels>
326 static inline void transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
327
328 /**
329 * Rotates a given frame either clockwise or counter-clockwise by 90 degree.
330 * @param source The source frame which will be rotated, must be valid
331 * @param target The resulting rotated target frame, must be valid and must have the same buffer size as the source frame
332 * @param sourceWidth The width of the source frame in pixel, with range [1, infinity)
333 * @param sourceHeight The height of the source frame in pixel, with range [1, infinity)
334 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
335 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
336 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
337 * @param worker Optional worker object to distribute the computation
338 * @tparam TElementType Data type of the elements of the image pixels
339 * @tparam tChannels Number of data channels, with range [1, infinity)
340 */
341 template <typename TElementType, unsigned int tChannels>
342 static inline void rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
343
344 /**
345 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the nearest pixel 'interpolation') or whether the homography relies on missing image information.
346 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
347 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
348 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
349 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
350 * @param input_H_output The homography to check which transforms points by following equation: inputPoint = input_H_output * outputPoint, must be valid
351 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
352 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
353 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
354 */
355 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
356
357 private:
358
359 /**
360 * Resizes a given frame by a nearest pixel search.
361 * @param source The source frame buffer, must be valid
362 * @param target The target frame buffer, must be valid
363 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
364 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
365 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
366 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
367 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
368 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
369 * @param firstTargetRow First (including) row to convert, with range [0, targetHeight)
370 * @param numberTargetRows Number of rows to convert, with range [1, targetHeight - firstTargetRow]
371 * @tparam T Data type the pixel channel values
372 * @tparam tChannels Number of data channels, range: [1, infinity)
373 */
374 template <typename T, unsigned int tChannels>
375 static void resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
376
377 /**
378 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation
379 * @param input The input frame that will be transformed, must be valid
380 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
381 * @param inputHeight Height of both input images pixel, with range [1, infinity)
382 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
383 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
384 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
385 * @param output The output frame using the given affine transform, must be valid
386 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
387 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
388 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
389 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
390 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
391 * @tparam tChannels Number of frame channels, range: [1, infinity)
392 */
393 template <unsigned int tChannels>
394 static void affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
395
396 /**
397 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation.
398 * @param input The input frame that will be transformed, must be valid
399 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
400 * @param inputHeight Height of both input images pixel, with range [1, infinity)
401 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
402 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
403 * @param output The output frame using the given homography, must be valid
404 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
405 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
406 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
407 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
408 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
409 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
410 * @tparam T Data type of each pixel channel, e.g., float, double, int
411 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
412 */
413 template <typename T, unsigned int tChannels>
414 static void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
415
416 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
417
418 /**
419 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation (using SSE)
420 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
421 * @param input The input frame that will be transformed
422 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
423 * @param inputHeight Height of both input images pixel, with range [1, infinity)
424 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
425 * @param affineTransform Affine transformation which is applied to input frame.
426 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
427 * @param output The output frame where the result of the transformation will be stored
428 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
429 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
430 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
431 * @param firstOutputRow The first output row to be handled
432 * @param numberOutputRows Number of output rows to be handled
433 * @tparam tChannels Number of frame channels
434 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
435 */
436 template <unsigned int tChannels>
437 static inline void affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
438
439 /**
440 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using SSE).
441 * @param input The input frame that will be transformed, must be valid
442 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
443 * @param inputHeight Height of both input images pixel, with range [1, infinity)
444 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
445 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
446 * @param output The output frame using the given homography, must be valid
447 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
448 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
449 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
450 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
451 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
452 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
453 * @tparam T Data type of each pixel channel, e.g., float, double, int
454 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
455 */
456 template <typename T, unsigned int tChannels>
457 static void homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
458
459 #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
460
461 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
462
463 /**
464 * Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using NEON and integer fixed-point arithmetic)
465 * @param input The input frame that will be transformed, must be valid
466 * @param inputWidth Width of both input images in pixel, with range [1, 65536)
467 * @param inputHeight Height of both input images pixel, with range [1, 65536)
468 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
469 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
470 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
471 * @param output The output frame using the given affine transform, must be valid
472 * @param outputWidth The width of the output image in pixel, with range [1, 65536)
473 * @param outputHeight The height of the output image in pixel, with range [1, 65536)
474 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
475 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
476 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
477 * @tparam tChannels Number of frame channels
478 */
479 template <unsigned int tChannels>
480 static inline void affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
481
482 /**
483 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using NEON).
484 * Beware: The output width 'outputWidth' must be >= 4, use homographySubset for small output frames
485 * @param input The input frame that will be transformed
486 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
487 * @param inputHeight Height of both input images pixel, with range [1, infinity)
488 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
489 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, @c nullptr to assign 0 to each channel
490 * @param output The output frame using the given homography
491 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
492 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
493 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
494 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
495 * @param firstOutputRow The first output row to be handled
496 * @param numberOutputRows Number of output rows to be handled
497 * @tparam T Data type of each pixel channel, e.g., float, double, int
498 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
499 * @see homographySubset().
500 */
501 template <typename T, unsigned int tChannels>
502 static inline void homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
503
504 #endif // OCEAN_HARDWARE_NEON_VERSION
505
506 /**
507 * Transforms an 8 bit per channel frame using the given homography.
508 * @param input The input frame that will be transformed, must be valid
509 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
510 * @param inputHeight Height of both input images pixel, with range [1, infinity)
511 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
512 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
513 * @param output The output frame using the given homography, must be valid
514 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
515 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
516 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
517 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
518 * @param outputOriginX The horizontal coordinate of the output frame's origin
519 * @param outputOriginY The vertical coordinate of the output frame's origin
520 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
521 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
522 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
523 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
524 * @tparam tChannels Number of frame channels
525 */
526 template <unsigned int tChannels>
527 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
528
529 /**
530 * Transforms a subset of a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
531 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
532 * @param input the input frame which will be transformed, must be valid
533 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
534 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
535 * @param lookupTable The lookup table which defines the transformation, must be valid
536 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
537 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
538 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
539 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
540 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
541 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
542 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
543 * @tparam tChannels Number of channels of the frame
544 */
545 template <unsigned int tChannels>
546 static void transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
547
548 /**
549 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
550 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
551 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
552 * @param input The input frame which will be transformed, must be valid
553 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
554 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
555 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
556 * @param lookupTable The lookup table which defines the transformation, must be valid
557 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
558 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
559 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
560 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
561 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
562 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
563 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
564 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
565 * @tparam tChannels Number of channels of the frame
566 */
567 template <unsigned int tChannels>
568 static void transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
569};
570
571inline bool FrameInterpolatorNearestPixel::Comfort::resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker)
572{
573 ocean_assert(frame && targetWidth >= 1u && targetHeight >= 1u);
574
575 Frame tmpFrame(FrameType(frame, targetWidth, targetHeight));
576
577 if (!resize(frame, tmpFrame, worker))
578 {
579 return false;
580 }
581
582 tmpFrame.setTimestamp(frame.timestamp());
583 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
584
585 frame = std::move(tmpFrame);
586 return true;
587}
588
589inline bool FrameInterpolatorNearestPixel::Comfort::rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker)
590{
591 return FrameTransposer::Comfort::rotate90(input, output, clockwise, worker);
592}
593
594inline bool FrameInterpolatorNearestPixel::Comfort::rotate180(const Frame& input, Frame& output, Worker* worker)
595{
596 return FrameTransposer::Comfort::rotate180(input, output, worker);
597}
598
599inline bool FrameInterpolatorNearestPixel::Comfort::rotate(const Frame& input, Frame& output, const int angle, Worker* worker)
600{
601 return FrameTransposer::Comfort::rotate(input, output, angle, worker);
602}
603
604template <typename T, unsigned int tChannels>
605inline void FrameInterpolatorNearestPixel::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
606{
607 ocean_assert(source && target);
608
609 if (worker)
610 {
611 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::resizeSubset<T, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
612 }
613 else
614 {
615 resizeSubset<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
616 }
617}
618
619template <unsigned int tChannels>
620inline void FrameInterpolatorNearestPixel::affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& affineTransform, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
621{
622 // Merge the additional translation into the affine transformation
623 const SquareMatrix3 adjustedAffineTransform = affineTransform * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
624
625 if (worker)
626 {
627 if (outputWidth >= 4u)
628 {
629#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
630 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
631 return;
632#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
633 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
634 {
635 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 32u);
636 return;
637 }
638#endif
639 }
640
641 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
642 }
643 else
644 {
645 if (outputWidth >= 4u)
646 {
647#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
648 affine8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
649 return;
650#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
651 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
652 {
653 affine8BitPerChannelIntegerNEONSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
654 return;
655 }
656#endif
657 }
658
659 affine8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
660 }
661}
662
663template <typename T, unsigned int tChannels>
664inline void FrameInterpolatorNearestPixel::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
665{
666 static_assert(tChannels >= 1u, "Invalid channel number!");
667
668 // Merge the additional translation into the homography
669 const SquareMatrix3 input_H_adjustedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
670
671 typedef typename TypeMapper<T>::Type MappedTypeT;
672
673 if (worker)
674 {
675 if (outputWidth >= 4u)
676 {
677#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
678 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySSESubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
679 return;
680#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
681 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyNEONSubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
682 return;
683#endif
684 }
685
686 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
687 }
688 else
689 {
690 if (outputWidth >= 4u)
691 {
692#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
693 homographySSESubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
694 return;
695#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
696 homographyNEONSubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
697 return;
698#endif
699 }
700
701 homographySubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
702 }
703}
704
705template <unsigned int tChannels>
706inline void FrameInterpolatorNearestPixel::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker, const uint8_t maskValue)
707{
708 if (worker)
709 {
710 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, 0u), 0u, outputHeight, 14u, 15u, 20u);
711 }
712 else
713 {
714 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, outputHeight);
715 }
716}
717
718template <unsigned int tChannels>
719inline void FrameInterpolatorNearestPixel::transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
720{
721 if (worker)
722 {
723 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transform8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 9u, 10u, 20u);
724 }
725 else
726 {
727 transform8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(lookupTable.sizeY()));
728 }
729}
730
731template <unsigned int tChannels>
732inline void FrameInterpolatorNearestPixel::transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
733{
734 if (worker)
735 {
736 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 11u, 12u, 20u);
737 }
738 else
739 {
740 transformMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, (unsigned int)lookupTable.sizeY());
741 }
742}
743
744template <typename TElementType, unsigned int tChannels>
745inline void FrameInterpolatorNearestPixel::rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
746{
747 static_assert(tChannels >= 1u, "Invalid channel number!");
748
749 ocean_assert(source != nullptr && target != nullptr);
750 ocean_assert(source != target);
751 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
752
753 FrameTransposer::rotate90<TElementType, tChannels>(source, target, sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, worker);
754}
755
756template <typename T, unsigned int tChannels>
757void FrameInterpolatorNearestPixel::resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
758{
759 static_assert(tChannels > 0u, "Invalid channel number!");
760 static_assert(sizeof(T) != 0, "Invalid data type!");
761
762 ocean_assert(source != nullptr && target != nullptr);
763 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
764 ocean_assert(targetWidth != 0u && targetHeight != 0u);
765
766 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
767
768 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
769 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
770
771 Memory memoryHorizontalLookups = Memory::create<unsigned int>(targetWidth);
772 unsigned int* horizontalLookups = memoryHorizontalLookups.data<unsigned int>();
773
774 for (unsigned int tx = 0u; tx < targetWidth; ++tx)
775 {
776 const unsigned int sx = tx * sourceWidth / targetWidth;
777 ocean_assert(sx < sourceWidth);
778
779 horizontalLookups[tx] = sx * tChannels;
780 }
781
782 target += firstTargetRow * targetStrideElements;
783
784 for (unsigned int ty = firstTargetRow; ty < firstTargetRow + numberTargetRows; ++ty)
785 {
786 const unsigned int sy = ty * sourceHeight / targetHeight;
787 ocean_assert(sy < sourceHeight);
788
789 const T* const sourceRow = source + sy * sourceStrideElements;
790
791 for (unsigned int tx = 0; tx < targetWidth; ++tx)
792 {
793 const T* const sourcePointer = sourceRow + horizontalLookups[tx];
794
795 for (unsigned int n = 0u; n < tChannels; ++n)
796 {
797 *target++ = sourcePointer[n];
798 }
799 }
800
801 target += targetPaddingElements;
802 }
803}
804
805template <unsigned int tChannels>
806void FrameInterpolatorNearestPixel::affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
807{
808 static_assert(tChannels >= 1u, "Invalid channel number!");
809
810 ocean_assert(input != nullptr && output != nullptr);
811 ocean_assert(inputWidth > 0u && inputHeight > 0u);
812 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
813 ocean_assert(affineTransform);
814 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
815
816 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
817
818 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
819
820 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
821 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
822
823 PixelType* outputData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
824
825 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
826 {
827 //
828 // We can slightly optimize the 3x3 matrix multiplication:
829 //
830 // | X0 Y0 Z0 | | x |
831 // | X1 Y1 Z1 | * | y |
832 // | 0 0 1 | | 1 |
833 //
834 // | xx | | X0 * x | | Y0 * y + Z0 |
835 // | yy | = | X1 * x | + | Y1 * y + Z1 |
836 //
837 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
838 //
839 // C0 = Y0 * y + Z0
840 // C1 = Y1 * y + Z1
841 //
842 // So the computation becomes:
843 //
844 // | x' | | X0 * x | | C0 |
845 // | y' | = | X1 * x | + | C1 |
846 //
847
848 const Vector2 X(affineTransform->data() + 0);
849 const Vector2 c(Vector2(affineTransform->data() + 3) * Scalar(y) + Vector2(affineTransform->data() + 6));
850
851 for (unsigned int x = 0u; x < outputWidth; ++x)
852 {
853 const Vector2 inputPosition = X * Scalar(x) + c;
854
855#ifdef OCEAN_DEBUG
856 const Scalar debugX = (*affineTransform)[0] * Scalar(x) + (*affineTransform)[3] * Scalar(y) + (*affineTransform)[6];
857 const Scalar debugY = (*affineTransform)[1] * Scalar(x) + (*affineTransform)[4] * Scalar(y) + (*affineTransform)[7];
858 ocean_assert(inputPosition.isEqual(Vector2(debugX, debugY), Scalar(0.01)));
859#endif
860
861 const unsigned int inputX = Numeric::round32(inputPosition.x());
862 const unsigned int inputY = Numeric::round32(inputPosition.y());
863
864 if (inputX < inputWidth && inputY < inputHeight) {
865 *outputData = *(PixelType*)(input + inputY * (inputWidth * tChannels + inputPaddingElements) + inputX * tChannels);
866 } else {
867 *outputData = *bColor;
868}
869
870 outputData++;
871 }
872
873 outputData = (PixelType*)((uint8_t*)outputData + outputPaddingElements);
874 }
875}
876
877template <typename T, unsigned int tChannels>
878void FrameInterpolatorNearestPixel::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
879{
880 static_assert(tChannels > 0u, "Invalid channel number!");
881
882 ocean_assert(input != nullptr && output != nullptr);
883 ocean_assert(inputWidth > 0u && inputHeight > 0u);
884 ocean_assert(outputWidth > 0u && outputHeight > 0u);
885 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
886
887 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
888
889 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
890 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
891
892 typedef typename DataType<T, tChannels>::Type PixelType;
893
894 const T zeroColor[tChannels] = {T(0)};
895 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
896
897 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
898 {
899 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
900
901 for (unsigned int x = 0u; x < outputWidth; ++x)
902 {
903 const Vector2 outputPosition = Vector2(Scalar(x), Scalar(y));
904 const Vector2 inputPosition(*input_H_output * outputPosition);
905
906 const unsigned int inputX = Numeric::round32(inputPosition.x());
907 const unsigned int inputY = Numeric::round32(inputPosition.y());
908
909 if (inputX < inputWidth && inputY < inputHeight)
910 {
911 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
912 }
913 else
914 {
915 *outputData = bColor;
916 }
917
918 outputData++;
919 }
920 }
921}
922
923#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
924
925template <unsigned int tChannels>
926inline void FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
927{
928 static_assert(tChannels >= 1u, "Invalid channel number!");
929
930 ocean_assert(input && output);
931 ocean_assert(inputWidth > 0u && inputHeight > 0u);
932 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
933 ocean_assert(affineTransform);
934 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
935
936 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
937
938 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
939
940 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
941 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
942
943 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
944
945 OCEAN_ALIGN_DATA(16)
946 unsigned int nearestNeighbours[4];
947
948 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
949 const __m128 m128_f_X0 = _mm_set_ps1(float((*affineTransform)(0, 0)));
950 const __m128 m128_f_X1 = _mm_set_ps1(float((*affineTransform)(1, 0)));
951
952 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
953 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputWidth * tChannels + inputPaddingElements);
954
955 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
956 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
957
958 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
959 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
960
961 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
962 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
963
964 // m128_i_zero = [0, 0, 0, 0]
965 const __m128i m128_i_zero = _mm_setzero_si128();
966
967 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
968 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
969
970 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
971 {
972 // We can slightly optimize the 3x3 matrix multiplication:
973 //
974 // | X0 Y0 Z0 | | x |
975 // | X1 Y1 Z1 | * | y |
976 // | 0 0 1 | | 1 |
977 //
978 // | xx | | X0 * x | | Y0 * y + Z0 |
979 // | yy | = | X1 * x | + | Y1 * y + Z1 |
980 //
981 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
982 //
983 // C0 = Y0 * y + Z0
984 // C1 = Y1 * y + Z1
985 //
986 // So the computation becomes:
987 //
988 // | x' | | X0 * x | | C0 |
989 // | y' | = | X1 * x | + | C1 |
990
991 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
992 const __m128 m128_f_C0 = _mm_set_ps1(float((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2)));
993 const __m128 m128_f_C1 = _mm_set_ps1(float((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2)));
994
995 for (unsigned int x = 0u; x < outputWidth; x += 4u)
996 {
997 if (x + 4u > outputWidth)
998 {
999 // the last iteration will not fit into the output frame,
1000 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1001
1002 ocean_assert(x >= 4u && outputWidth > 4u);
1003 const unsigned int newX = outputWidth - 4u;
1004
1005 ocean_assert(x > newX);
1006 outputPixelData -= x - newX;
1007
1008 x = newX;
1009
1010 // the for loop will stop after this iteration
1011 ocean_assert(!(x + 4u < outputWidth));
1012 }
1013
1014 // we need four successive x coordinate floats:
1015 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1016 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1017
1018 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1019 const __m128 m128_f_inputX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1020 const __m128 m128_f_inputY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1021
1022 // Compute the coordinates of the nearest neighbors
1023 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1024 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1025
1026 // Note: Detection of input position outside the input image
1027 //
1028 // If the input point is outside the input image, then set the index
1029 // of its nearest neighbor to a value that is above the number of
1030 // available pixels in the image. When writing to the output, a
1031 // check will make sure to use the background color for those
1032 // pixels:
1033 //
1034 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1035 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1036 //
1037 // This approach keeps the amount of data that has to be transferred
1038 // between SSE and CPU registers to a minimum.
1039
1040 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1041 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1042 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1043 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1044
1045 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1046 // nearestNeighborsElement = (isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1047 const __m128i m_128_i_nearestNeighborElements = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels)));
1048 _mm_store_si128((__m128i*)nearestNeighbours, m_128_i_nearestNeighborElements);
1049
1050 // Update the output pixels
1051 outputPixelData[0] = nearestNeighbours[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[0]) : *bColor;
1052 outputPixelData[1] = nearestNeighbours[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[1]) : *bColor;
1053 outputPixelData[2] = nearestNeighbours[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[2]) : *bColor;
1054 outputPixelData[3] = nearestNeighbours[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[3]) : *bColor;
1055
1056 outputPixelData += 4u;
1057 }
1058
1059 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1060 }
1061}
1062
1063template <typename T, unsigned int tChannels>
1064void FrameInterpolatorNearestPixel::homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1065{
1066 static_assert(tChannels > 0u, "Invalid channel number!");
1067
1068 ocean_assert(input != nullptr && output != nullptr);
1069 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1070 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1071 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1072
1073 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1074
1075 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1076 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1077
1078 typedef typename DataType<T, tChannels>::Type PixelType;
1079
1080 const T zeroColor[tChannels] = {T(0)};
1081 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1082
1083 OCEAN_ALIGN_DATA(16) unsigned int nearestNeighbourElementOffsets[4];
1084
1085 // | X0 Y0 Z0 | | x |
1086 // Homography H = | X1 Y1 Z1 |, point p = | y |
1087 // | X2 Y2 Z2 | | 1 |
1088 //
1089 // | xx |
1090 // pp = H * p = | yy |
1091 // | zz |
1092 //
1093 // | xx | | X0 Y0 Z0 | | x |
1094 // <=> | yy | = | X1 Y1 Z1 | * | y |
1095 // | zz | | X2 Y2 Z2 | | 1 |
1096 //
1097 // | xx | | X0 * x | | Y0 * y + Z0 |
1098 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1099 // | zz | | X2 * x | | Y2 * y + Z2 |
1100 //
1101 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1102 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1103 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1104 //
1105 // Where C is a constant term that can be pre-computed (per image row)
1106 //
1107 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1108 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1109
1110 // [Xi, Xi, Xi, Xi], i = {0, 1, 2}
1111 const __m128 m128_f_X0 = _mm_set_ps1((float)(*input_H_output)(0, 0));
1112 const __m128 m128_f_X1 = _mm_set_ps1((float)(*input_H_output)(1, 0));
1113 const __m128 m128_f_X2 = _mm_set_ps1((float)(*input_H_output)(2, 0));
1114
1115 // Store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1116 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
1117
1118 const unsigned int inputPixelElementIndexEnd = inputHeight * inputStrideElements;
1119
1120 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
1121 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
1122
1123 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
1124 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
1125
1126 // [tChannels, tChannels, tChannels tChannels]
1127 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
1128
1129 // m128_i_zero = [0, 0, 0, 0]
1130 const __m128i m128_i_zero = _mm_setzero_si128();
1131
1132 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1133 {
1134 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1135
1136 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1137 const __m128 m128_f_C0 = _mm_set_ps1((float)((*input_H_output)(0, 1) * Scalar(y) + ((*input_H_output)(0, 2))));
1138 const __m128 m128_f_C1 = _mm_set_ps1((float)((*input_H_output)(1, 1) * Scalar(y) + ((*input_H_output)(1, 2))));
1139 const __m128 m128_f_C2 = _mm_set_ps1((float)((*input_H_output)(2, 1) * Scalar(y) + ((*input_H_output)(2, 2))));
1140
1141 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1142 {
1143 if (x + 4u > outputWidth)
1144 {
1145 // the last iteration will not fit into the output frame,
1146 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1147
1148 ocean_assert(x >= 4u && outputWidth > 4u);
1149 const unsigned int newX = outputWidth - 4u;
1150
1151 ocean_assert(x > newX);
1152 outputPixelData -= x - newX;
1153
1154 x = newX;
1155
1156 // the for loop will stop after this iteration
1157 ocean_assert(!(x + 4u < outputWidth));
1158 }
1159
1160 // we need four successive x coordinate floats:
1161 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1162 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1163
1164 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1165 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1166 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1167 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
1168
1169#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
1170
1171 // we calculate the (approximated) inverse of zz,
1172 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
1173 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1174 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
1175
1176 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1177 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
1178 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
1179
1180#else
1181
1182 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1183 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
1184 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
1185
1186#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
1187
1188 // Compute the coordinates of the nearest neighbors
1189 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1190 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1191
1192 // Note: Detection of input position outside the input image
1193 //
1194 // If the input point is outside the input image, then set the index
1195 // of its nearest neighbor to a value that is above the number of
1196 // available pixels in the image. When writing to the output, a
1197 // check will make sure to use the background color for those
1198 // pixels:
1199 //
1200 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1201 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1202 //
1203 // This approach keeps the amount of data that has to be transferred
1204 // between SSE and CPU registers to a minimum.
1205
1206 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1207 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1208 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1209 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1210
1211 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1212 // m_128_i_nearestNeighbors = (isOutsideImage ? 0xFFFFFFFF : inputY * inputWidth + inputX)
1213 const __m128i m_128_i_nearestNeighbors = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels))); // nn = y' * inputWidth + x'
1214 _mm_store_si128((__m128i*)nearestNeighbourElementOffsets, m_128_i_nearestNeighbors);
1215
1216 // Update the output pixels
1217 outputPixelData[0] = nearestNeighbourElementOffsets[0] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1218 outputPixelData[1] = nearestNeighbourElementOffsets[1] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1219 outputPixelData[2] = nearestNeighbourElementOffsets[2] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1220 outputPixelData[3] = nearestNeighbourElementOffsets[3] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1221
1222 outputPixelData += 4u;
1223 }
1224 }
1225}
1226
1227#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1228
1229#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1230
1231template <unsigned int tChannels>
1232void FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1233{
1234 // The following optimizations have been applied:
1235 //
1236 // - Matrix-vector multiplication for affine transformations:
1237 //
1238 // | x' | | X0 Y0 Z0 | | x |
1239 // | y' | = | X1 Y1 Z1 | * | y |
1240 // | 1 | | 0 0 1 | | 1 |
1241 //
1242 // which is
1243 //
1244 // x' = X0 * x + Y0 * y + Z0
1245 // y' = X1 * x + Y1 * y + Z1
1246 //
1247 // We can slightly optimize this operation, since y is constant within the inner
1248 // loop. The two terms on the right side in the above equations can be
1249 // pre-calculated:
1250 //
1251 // C0 = Y0 * y + Z0
1252 // C1 = Y1 * y + Z1
1253 //
1254 // So the computation becomes:
1255 //
1256 // | x' | | X0 * x | | C0 |
1257 // | y' | = | X1 * x | + | C1 |
1258 //
1259 // - For better utilization of cache coherence, the (output) image is processed
1260 // in blocks (64 x 64 pixels, if possible)
1261 //
1262 // - Integer fixed-point arithmetic.
1263 //
1264 // - Update products from floating point numbers with the beginning of blocks,
1265 // because the rounding error of fixed-point operations increases for larger
1266 // values:
1267 //
1268 // f - float number
1269 // i - fixed-point representation of f
1270 // v - coordinate value
1271 // eps = (f - i) - loss of precision (eps > 0)
1272 //
1273 // Rounding error:
1274 //
1275 // e = |(v * f) - (v * i)| = |v * (f - i)| = |v * eps|
1276 // (increases linearly for larger coordinate values v, i.e., with image size)
1277 //
1278 // The rounding error can be kept at bay by replacing the product (v * i) with
1279 // (v * f) at the beginning of each block followed by adding an offset for all
1280 // other pixels in the block, (N * f) where N is the number of pixels which
1281 // are processed concurrently by SIMD instructions.
1282 //
1283
1284 static_assert(tChannels >= 1u, "Invalid channel number!");
1285
1286 constexpr unsigned int fractionalBits = 15u;
1287 constexpr unsigned int totalBits = (unsigned int)(CHAR_BIT * sizeof(int));
1288
1289 static_assert((fractionalBits + 1u /* sign bit */) < totalBits, "Number of fractional bits exceeds number of total bits");
1290
1291 constexpr unsigned int maxImageEdgeLength = 1u << (totalBits - fractionalBits - 1u /* sign bit */);
1292
1293 // Scale to convert float value, v, to fixed-point value, v_q = int(round(fixedPointScale * v))
1294 constexpr Scalar fixedPointScale = Scalar(1u << fractionalBits);
1295
1296 // Number of pixels processed by NEON in each iteration
1297 constexpr unsigned int pixelsPerIteration = 4u;
1298
1299 ocean_assert(input && output);
1300 ocean_assert_and_suppress_unused(inputWidth > 0u && inputHeight > 0u && inputWidth <= maxImageEdgeLength && inputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1301 ocean_assert_and_suppress_unused(outputWidth >= pixelsPerIteration && outputHeight > 0u && outputWidth <= maxImageEdgeLength && outputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1302 ocean_assert(affineTransform);
1303 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
1304
1305 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1306
1307 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1308
1309 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1310 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
1311
1312 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
1313
1314 const unsigned int outputRowEnd = firstOutputRow + numberOutputRows;
1315
1316 // Inidices of the final nearest neighbor pixel, which are used to the interpolation
1317 unsigned int nearestNeighboursElements[4];
1318
1319 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
1320 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
1321
1322 // m128_u_inputWidth = [inputWidth, intputWidth, intputWidth, intputWidth], and the same for inputHeight
1323 const uint32x4_t m128_u_inputWidth = vdupq_n_u32(inputWidth);
1324 const uint32x4_t m128_u_inputHeight = vdupq_n_u32(inputHeight);
1325
1326 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
1327 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputWidth * tChannels + inputPaddingElements);
1328
1329 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
1330 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1331
1332 // m128_s_offsets_0123 = [0, 1, 2, 3]
1333 const int offsets_0123[4] = { 0, 1, 2, 3 };
1334 const int32x4_t m128_s_offsets_0123 = vld1q_s32(offsets_0123);
1335
1336 // m128_f_pixelsPerIteration = [4.0f, 4.0f, 4.0f, 4.0f]
1337 const float32x4_t m128_f_pixelsPerIteration = vdupq_n_f32((float)pixelsPerIteration);
1338
1339 // Float-based transformation value X0 multiplied with scale for fixed-point
1340 // numbers. This is used to update the fixed-point products, X0 * x and X1 * x,
1341 // at the beginning of each block, i.e.
1342 // m128_f_q_X0 = [v, v, v, v], v = fixedPointScale * X0, and the same for X1
1343 const float32x4_t m128_f_X0 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(0, 0)));
1344 const float32x4_t m128_f_X1 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(1, 0)));
1345
1346 // Increment that is added to fixed-point product computed at the beginning of
1347 // each block, X0 * x and X1 * x, in each iteration inside the block
1348 const int32x4_t m128_s_q_X0x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_pixelsPerIteration));
1349 const int32x4_t m128_s_q_X1x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_pixelsPerIteration));
1350
1351 // Determine the optimal block size
1352 constexpr unsigned int blockSize = 64u;
1353 constexpr unsigned int blockElements = blockSize * blockSize;
1354 const unsigned int blockWidth = std::min(blockElements / std::min(numberOutputRows, blockSize), outputWidth);
1355 const unsigned int blockHeight = std::min(blockElements / blockWidth, numberOutputRows);
1356 ocean_assert(blockWidth > 0u && blockWidth <= outputWidth);
1357 ocean_assert(blockHeight > 0u && blockHeight <= numberOutputRows);
1358
1359 // Index of pixel that is the last in a block of #pixelsPerIterations pixels, i.e. number of remaining pixels after
1360 // this point are less than #pixelsPerIterations. When this pixel index is reached all pointers will be moved left
1361 // so that we can process one last block of #pixelsPerIterations pixels. That also means that depending on the width
1362 // of the output image between [1, pixelsPerIterations) pixels will be computed a second time.
1363 const unsigned int lastMultipleNeonPixelBlockStart = outputWidth - pixelsPerIteration;
1364
1365 // m128_f_lastMultipleNeonPixelBlockStart = [(float)(lastMultipleNeonPixelBlockStart + 0), (float)(lastMultipleNeonPixelBlockStart + 1), (float)(lastMultipleNeonPixelBlockStart + 2), (float)(lastMultipleNeonPixelBlockStart + 3)]
1366 const float32x4_t m128_f_lastMultipleNeonPixelBlockStart = vcvtq_f32_s32(vaddq_s32(vdupq_n_s32((int)lastMultipleNeonPixelBlockStart), m128_s_offsets_0123));
1367
1368 // m128_s_q_X0x_lastMultipleNeonPixelBlockStart = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (lastMultipleNeonPixelBlockStart + i))), i = 0...3, and similarly for X1
1369 const int32x4_t m128_s_q_X0x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_lastMultipleNeonPixelBlockStart));
1370 const int32x4_t m128_s_q_X1x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_lastMultipleNeonPixelBlockStart));
1371
1372 for (unsigned int blockYStart = firstOutputRow; blockYStart < outputRowEnd; blockYStart += blockHeight)
1373 {
1374 const unsigned int blockYEnd = std::min(blockYStart + blockHeight, outputRowEnd);
1375
1376 for (unsigned int blockXStart = 0u; blockXStart < outputWidth; blockXStart += blockWidth)
1377 {
1378 const unsigned int blockXEnd = std::min(blockXStart + blockWidth, outputWidth);
1379
1380 for (unsigned int y = blockYStart; y < blockYEnd; ++y)
1381 {
1382 outputPixelData = (PixelType*)(output + y * (outputWidth * tChannels + outputPaddingElements) + blockXStart * tChannels);
1383
1384 // Constant parts, cf. optimization of matrix-vector multiplication above
1385 // m128_s_C0 = [C0, C0, C0, C0], C0 = int(round(leftShiftFactor * (Y0 * y + Z0))), and similarly for C1
1386 const int32x4_t m128_s_q_C0 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2))));
1387 const int32x4_t m128_s_q_C1 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2))));
1388
1389 // Update products, X0 * x and X1 * x, from floating point numbers with the
1390 // beginning of this block, since the rounding error of fixed-point operations
1391 // increases for larger coordinate values, cf. list of optimizations above.
1392 //
1393 // m128_s_x_0123 = [blockXStart + 0, blockXStart + 1, blockXStart + 2, blockXStart + 3]
1394 const int32x4_t m128_s_x_0123 = vaddq_s32(vdupq_n_s32(int(blockXStart)), m128_s_offsets_0123);
1395
1396 // m128_f_x_0123 = [(float)(x + 0), (float)(x + 1), (float)(x + 2), (float)(x + 3)]
1397 const float32x4_t m128_f_x_0123 = vcvtq_f32_s32(m128_s_x_0123);
1398
1399 // m128_s_q_X0x = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (x + i))), i = 0...3, and similarly for X1
1400 int32x4_t m128_s_q_X0x = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_x_0123));
1401 int32x4_t m128_s_q_X1x = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_x_0123));
1402
1403 for (unsigned int x = blockXStart; x < blockXEnd; x += pixelsPerIteration)
1404 {
1405 if (x + pixelsPerIteration > outputWidth)
1406 {
1407 ocean_assert(x + pixelsPerIteration > outputWidth);
1408 ocean_assert(x >= pixelsPerIteration && outputWidth > pixelsPerIteration);
1409 ocean_assert(lastMultipleNeonPixelBlockStart == (outputWidth - pixelsPerIteration));
1410
1411 outputPixelData -= (x - lastMultipleNeonPixelBlockStart);
1412
1413 x = lastMultipleNeonPixelBlockStart;
1414
1415 m128_s_q_X0x = m128_s_q_X0x_lastMultipleNeonPixelBlockStart;
1416 m128_s_q_X1x = m128_s_q_X1x_lastMultipleNeonPixelBlockStart;
1417
1418 // the for loop will stop after this iteration
1419 ocean_assert(!(x + pixelsPerIteration < outputWidth));
1420 }
1421
1422 // Compute pixel location in the input image
1423 // m128_s_q_inputX = x' = C0 + X0 * x
1424 // m128_s_q_inputY = y' = C1 + X1 * y
1425 const int32x4_t m128_s_q_inputX = vaddq_s32(m128_s_q_C0, m128_s_q_X0x);
1426 const int32x4_t m128_s_q_inputY = vaddq_s32(m128_s_q_C1, m128_s_q_X1x);
1427
1428 // Convert (signed) fixed-point location to unsigned int, i.e., negative values
1429 // will be larger than image dimensions (width, height), cf. note below
1430 //
1431 // m128_u_inputX = (unsigned int) round(inputX >> N)
1432 // m128_u_inputY = (unsigned int) round(inputY >> N)
1433 const uint32x4_t m128_u_inputX = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputX, fractionalBits));
1434 const uint32x4_t m128_u_inputY = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputY, fractionalBits));
1435
1436 // Note: Detection of input position outside the input image
1437 //
1438 // If the input point is outside the input image, then set the index
1439 // of its nearest neighbor to a value that is above the number of
1440 // available pixels in the image. When writing to the output, a
1441 // check will make sure to use the background color for those
1442 // pixels:
1443 //
1444 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1445 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1446 //
1447 // This approach keeps the amount of data that has to be transferred
1448 // between NEON and CPU registers to a minimum.
1449
1450 // Casting negative signed values to unsigned value results in very large values, e.g., ((unsigned int) -1) > inputWidth.
1451 // We'll exploit that below to check is pixel coordinates are outside the image.
1452 // m128_u_isOutsideImage = (x >= inputWidth || y >= inputHeight) ? 0xFFFFFFFF : 0x00000000;
1453 const uint32x4_t m128_u_isOutsideImage = vorrq_u32(vcgeq_u32(m128_u_inputX, m128_u_inputWidth), vcgeq_u32(m128_u_inputY, m128_u_inputHeight));
1454
1455 // Determine the pixel indices of the nearest neighbors and store the result
1456 // If the pixel is outside the image then set the index of the nearest neighbor to the largest possible value
1457 // m_128_u_nearestNeighbors = m128_u_isOutsideImage | (inputY * inputStrideElements) + (inputX * channels);
1458 // which is equivalent to
1459 // m_128_u_nearestNeighborElements = (m128_u_isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1460 const uint32x4_t m_128_u_nearestNeighborsElements = vorrq_u32(m128_u_isOutsideImage, vaddq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), vmulq_u32(m128_u_inputX, m128_u_channels)));
1461 vst1q_u32(nearestNeighboursElements, m_128_u_nearestNeighborsElements);
1462
1463 outputPixelData[0] = nearestNeighboursElements[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[0]) : *bColor;
1464 outputPixelData[1] = nearestNeighboursElements[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[1]) : *bColor;
1465 outputPixelData[2] = nearestNeighboursElements[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[2]) : *bColor;
1466 outputPixelData[3] = nearestNeighboursElements[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[3]) : *bColor;
1467
1468 outputPixelData += pixelsPerIteration;
1469
1470 // m128_s_q_X0x += m128_s_q_X0x_increment, and similarly for X1
1471 m128_s_q_X0x = vaddq_s32(m128_s_q_X0x, m128_s_q_X0x_increment);
1472 m128_s_q_X1x = vaddq_s32(m128_s_q_X1x, m128_s_q_X1x_increment);
1473 }
1474 }
1475 }
1476
1477 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1478 }
1479}
1480
1481template <typename T, unsigned int tChannels>
1482void FrameInterpolatorNearestPixel::homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1483{
1484 static_assert(tChannels >= 1u, "Invalid channel number!");
1485
1486 ocean_assert(input != nullptr && output != nullptr);
1487 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1488 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1489 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1490
1491 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1492
1493 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1494 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1495
1496 typedef typename DataType<T, tChannels>::Type PixelType;
1497
1498 const T zeroColor[tChannels] = {T(0)};
1499 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1500
1501 unsigned int validPixels[4];
1502 unsigned int nearestNeighbourElementOffsets[4];
1503
1504 // | X0 Y0 Z0 | | x |
1505 // Homography H = | X1 Y1 Z1 |, point p = | y |
1506 // | X2 Y2 Z2 | | 1 |
1507 //
1508 // | xx |
1509 // pp = H * p = | yy |
1510 // | zz |
1511 //
1512 // | xx | | X0 Y0 Z0 | | x |
1513 // <=> | yy | = | X1 Y1 Z1 | * | y |
1514 // | zz | | X2 Y2 Z2 | | 1 |
1515 //
1516 // | xx | | X0 * x | | Y0 * y + Z0 |
1517 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1518 // | zz | | X2 * x | | Y2 * y + Z2 |
1519 //
1520 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1521 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1522 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1523 //
1524 // Where C is a constant term that can be pre-computed (per image row)
1525 //
1526 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1527 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1528
1529 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
1530 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
1531 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
1532 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
1533
1534 // we store 4 floats: [0.5f, 0.5f, 0.5f, 0.5f]
1535 const float32x4_t m128_f_pointFive = vdupq_n_f32(0.5f);
1536 const float32x4_t m128_f_negPointFive = vdupq_n_f32(-0.5f);
1537
1538 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1539 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
1540
1541 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1542
1543 // we store 4 floats: [inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f], and same with inputHeight
1544 const float32x4_t m128_f_inputWidth_pointFive = vdupq_n_f32(float(inputWidth) - 0.5f);
1545 const float32x4_t m128_f_inputHeight_pointFive = vdupq_n_f32(float(inputHeight) - 0.5f);
1546
1547 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1548 {
1549 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1550
1551 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1552 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
1553 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
1554 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
1555
1556 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1557 {
1558 if (x + 4u > outputWidth)
1559 {
1560 // Since the last iteration will not fit into the output frame, we'll shift N pixel left so that it fits again (at most 3 pixels).
1561
1562 ocean_assert(x >= 4u && outputWidth > 4u);
1563 const unsigned int newX = outputWidth - 4u;
1564
1565 ocean_assert(x > newX);
1566 outputPixelData -= x - newX;
1567
1568 x = newX;
1569
1570 // the for loop will stop after this iteration
1571 ocean_assert(!(x + 4u < outputWidth));
1572 }
1573
1574 // we need four successive x coordinate floats:
1575 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1576 float x_0123[4] = { float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u) };
1577 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
1578
1579 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1580 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
1581 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
1582 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
1583
1584#ifdef USE_DIVISION_ARM64_ARCHITECTURE
1585
1586 // using the division available from ARM64 is more precise
1587 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
1588 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
1589
1590#else
1591
1592 // we calculate the (approximated) inverse of zz
1593 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1594 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
1595 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
1596
1597 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1598 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
1599 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
1600
1601#endif // USE_DIVISION_ARM64_ARCHITECTURE
1602
1603 // Mark pixels inside the input image as valid, all others as invalid
1604 const uint32x4_t m128_u_validPixelX = vandq_u32(vcltq_f32(m128_f_inputX, m128_f_inputWidth_pointFive), vcgtq_f32(m128_f_inputX, m128_f_negPointFive)); // inputX < (inputWidth - 0.5) && inputX >= -0.5 ? 0xFFFFFFFF : 0x00000000
1605 const uint32x4_t m128_u_validPixelY = vandq_u32(vcltq_f32(m128_f_inputY, m128_f_inputHeight_pointFive), vcgtq_f32(m128_f_inputY, m128_f_negPointFive)); // inputY < (inputHeight - 0.5) && inputY > -0.5 ? 0xFFFFFFFF : 0x00000000
1606
1607 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
1608
1609 // Stop here if all pixels are invalid
1610 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
1611 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
1612 {
1613#ifdef OCEAN_DEBUG
1614 // clang-format off
1615 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
1616 // clang-format on
1617 vst1q_u32(debugValidPixels, m128_u_validPixel);
1618 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
1619#endif
1620
1621 outputPixelData[0] = bColor;
1622 outputPixelData[1] = bColor;
1623 outputPixelData[2] = bColor;
1624 outputPixelData[3] = bColor;
1625
1626 outputPixelData += 4;
1627
1628 continue;
1629 }
1630
1631 // Determine the pixel indices of the nearest neighbors and store the result
1632 vst1q_u32(validPixels, m128_u_validPixel);
1633 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
1634
1635 const uint32x4_t m128_u_inputX = vcvtq_u32_f32(vaddq_f32(m128_f_inputX, m128_f_pointFive)); // Round to nearest integer: x' = (int) (x + 0.5f)
1636 const uint32x4_t m128_u_inputY = vcvtq_u32_f32(vaddq_f32(m128_f_inputY, m128_f_pointFive)); // Round to nearest integer: y' = (int) (y + 0.5f)
1637 const uint32x4_t m_128_u_nearestNeighbourElementOffsets = vmlaq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), m128_u_inputX, m128_u_channels); // nn = y' * inputStrideElements + x' * channels
1638 vst1q_u32(nearestNeighbourElementOffsets, m_128_u_nearestNeighbourElementOffsets);
1639
1640#ifdef OCEAN_DEBUG
1641 unsigned int debugInputX[4];
1642 unsigned int debugInputY[4];
1643 vst1q_u32(debugInputX, m128_u_inputX);
1644 vst1q_u32(debugInputY, m128_u_inputY);
1645 ocean_assert(!validPixels[0] || (debugInputX[0] < inputWidth && debugInputY[0] < inputHeight));
1646 ocean_assert(!validPixels[1] || (debugInputX[1] < inputWidth && debugInputY[1] < inputHeight));
1647 ocean_assert(!validPixels[2] || (debugInputX[2] < inputWidth && debugInputY[2] < inputHeight));
1648 ocean_assert(!validPixels[3] || (debugInputX[3] < inputWidth && debugInputY[3] < inputHeight));
1649#endif
1650
1651 outputPixelData[0] = validPixels[0] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1652 outputPixelData[1] = validPixels[1] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1653 outputPixelData[2] = validPixels[2] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1654 outputPixelData[3] = validPixels[3] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1655
1656 outputPixelData += 4;
1657 }
1658 }
1659}
1660
1661#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1662
1663template <unsigned int tChannels>
1664void FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1665{
1666 static_assert(tChannels > 0u, "Invalid channel number!");
1667
1668 ocean_assert(input != nullptr && output != nullptr && outputMask != nullptr);
1669 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1670 ocean_assert(outputWidth > 0u && outputHeight > 0u);
1671 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1672
1673 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1674
1675 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1676 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1677 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1678
1679 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1680
1681 output += firstOutputRow * outputStrideElements;
1682 outputMask += firstOutputRow * outputMaskStrideElements;
1683
1684 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1685 {
1686 PixelType* outputPixel = (PixelType*)(output);
1687
1688 for (unsigned int x = 0; x < outputWidth; ++x)
1689 {
1690 const Vector2 outputPosition = Vector2(Scalar(int(x) + outputOriginX), Scalar(int(y) + outputOriginY));
1691 const Vector2 inputPosition(*input_H_output * outputPosition);
1692
1693 const unsigned int inputX = Numeric::round32(inputPosition.x());
1694 const unsigned int inputY = Numeric::round32(inputPosition.y());
1695
1696 if (inputX < inputWidth && inputY < inputHeight)
1697 {
1698 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1699 *outputMask = maskValue;
1700 }
1701 else
1702 {
1703 *outputMask = 0xFFu - maskValue;
1704 }
1705
1706 ++outputPixel;
1707 ++outputMask;
1708 }
1709
1710 output += outputStrideElements;
1711 outputMask += outputMaskPaddingElements;
1712 }
1713}
1714
1715template <unsigned int tChannels>
1716void FrameInterpolatorNearestPixel::transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
1717{
1718 static_assert(tChannels > 0u, "Invalid channel number!");
1719
1720 ocean_assert(lookupTable != nullptr);
1721 ocean_assert(input != nullptr && output != nullptr);
1722
1723 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1724 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1725
1726 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1727
1728 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1729 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1730
1731 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1732
1733 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1734 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
1735
1736 if (offset)
1737 {
1738 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1739 {
1740 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1741
1742 for (unsigned int x = 0u; x < outputWidth; ++x)
1743 {
1744 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1745 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1746
1747 const unsigned int inputX = Numeric::round32(inputPosition.x());
1748 const unsigned int inputY = Numeric::round32(inputPosition.y());
1749
1750 if (inputX < inputWidth && inputY < inputHeight)
1751 {
1752 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1753 }
1754 else
1755 {
1756 *outputData = *bColor;
1757 }
1758
1759 ++outputData;
1760 }
1761 }
1762 }
1763 else
1764 {
1765 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1766 {
1767 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1768
1769 for (unsigned int x = 0u; x < outputWidth; ++x)
1770 {
1771 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1772
1773 const unsigned int inputX = Numeric::round32(inputPosition.x());
1774 const unsigned int inputY = Numeric::round32(inputPosition.y());
1775
1776 if (inputX < inputWidth && inputY < inputHeight)
1777 {
1778 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1779 }
1780 else
1781 {
1782 *outputData = *bColor;
1783 }
1784
1785 ++outputData;
1786 }
1787 }
1788 }
1789}
1790
1791template <unsigned int tChannels>
1792void FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
1793{
1794 static_assert(tChannels > 0u, "Invalid channel number!");
1795
1796 ocean_assert(lookupTable != nullptr);
1797 ocean_assert(input != nullptr && output != nullptr);
1798
1799 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1800 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1801
1802 ocean_assert(NumericT<unsigned int>::isInsideValueRange(lookupTable->sizeX()));
1803 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1804
1805 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1806 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1807 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1808
1809 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1810
1811 output += firstRow * outputStrideElements;
1812 outputMask += firstRow * outputMaskStrideElements;
1813
1814 if (offset)
1815 {
1816 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1817 {
1818 PixelType* outputPixel = (PixelType*)(output);
1819
1820 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1821 {
1822 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1823 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1824
1825 const unsigned int inputX = Numeric::round32(inputPosition.x());
1826 const unsigned int inputY = Numeric::round32(inputPosition.y());
1827
1828 if (inputX < inputWidth && inputY < inputHeight)
1829 {
1830 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1831 *outputMask = maskValue;
1832 }
1833 else
1834 {
1835 *outputMask = 0xFF - maskValue;
1836 }
1837
1838 ++outputPixel;
1839 ++outputMask;
1840 }
1841
1842 output += outputStrideElements;
1843 outputMask += outputMaskPaddingElements;
1844 }
1845 }
1846 else
1847 {
1848 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1849 {
1850 PixelType* outputPixel = (PixelType*)(output);
1851
1852 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1853 {
1854 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1855
1856 const unsigned int inputX = Numeric::round32(inputPosition.x());
1857 const unsigned int inputY = Numeric::round32(inputPosition.y());
1858
1859 if (inputX < inputWidth && inputY < inputHeight)
1860 {
1861 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1862 *outputMask = maskValue;
1863 }
1864 else
1865 {
1866 *outputMask = 0xFF - maskValue;
1867 }
1868
1869 ++outputPixel;
1870 ++outputMask;
1871 }
1872
1873 output += outputStrideElements;
1874 outputMask += outputMaskPaddingElements;
1875 }
1876 }
1877}
1878
1879} // namespace CV
1880
1881} // namespace Ocean
1882
1883#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorNearestPixel.h:49
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
Definition FrameInterpolatorNearestPixel.h:589
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine image transformation to a frame (with zipped pixel format) and renders using neares...
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search.
static bool transform(const Frame &input, Frame &output, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, Worker *worker=nullptr)
Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation...
static bool transformMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &lookupTable, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFFu, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
Definition FrameInterpolatorNearestPixel.h:594
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
Definition FrameInterpolatorNearestPixel.h:599
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame by application of a ho...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorNearestPixel.h:189
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements a nearest pixel frame interpolator.
Definition FrameInterpolatorNearestPixel.h:35
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the proces...
Definition FrameInterpolatorNearestPixel.h:605
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:664
static void affine8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_A_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
Definition FrameInterpolatorNearestPixel.h:620
static void transform8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:719
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition FrameInterpolatorNearestPixel.h:39
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:878
static void resizeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a given frame by a nearest pixel search.
Definition FrameInterpolatorNearestPixel.h:757
static void rotate90(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degree.
Definition FrameInterpolatorNearestPixel.h:745
static void affine8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:926
static void transformMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable *lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:1792
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorNearestPixel.h:1664
static void affine8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:806
static void homographySSESubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1064
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 &input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:706
static void affine8BitPerChannelIntegerNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using ...
Definition FrameInterpolatorNearestPixel.h:1232
static void transform8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame (with zipped pixel format) into an output frame by applica...
Definition FrameInterpolatorNearestPixel.h:1716
static void transformMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable &lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:732
static void homographyNEONSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1482
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:63
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:468
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:456
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1808
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4236
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4231
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4221
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4226
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
T bilinearValue(const TScalar x, const TScalar y) const
Applies a lookup for a specific position in this lookup object.
Definition Lookup2.h:1815
This class implements an object able to allocate memory.
Definition base/Memory.h:22
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr int32_t round32(const T value)
Returns the rounded 32 bit integer value of a given value.
Definition Numeric.h:2067
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2090
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1046
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition SquareMatrix3.h:1341
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:42
float Scalar
Definition of a scalar type.
Definition Math.h:129
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32