Ocean
Loading...
Searching...
No Matches
FrameInterpolatorNearestPixel.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
10
11#include "ocean/cv/CV.h"
14
15#include "ocean/base/DataType.h"
16#include "ocean/base/Frame.h"
17#include "ocean/base/Memory.h"
18#include "ocean/base/Worker.h"
19
20#include "ocean/math/Lookup2.h"
22
23namespace Ocean
24{
25
26namespace CV
27{
28
29/**
30 * This class implements a nearest pixel frame interpolator.
31 * Actually, no pixels are interpolated, but the color intensities from the nearest pixels (e.g., based on rounding) is used.<br>
32 * @ingroup cv
33 */
34class OCEAN_CV_EXPORT FrameInterpolatorNearestPixel
35{
36 public:
37
38 /// Definition of a lookup table for 2D vectors.
40
41 public:
42
43 /**
44 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
45 * Best practice is to avoid using these functions if binary size matters,<br>
46 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
47 */
48 class OCEAN_CV_EXPORT Comfort
49 {
50 public:
51
52 /**
53 * Resizes a given frame by a nearest pixel search.
54 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
55 * @param source The source frame that will be resized, must have a zipped pixel format, must be valid
56 * @param target The target frame that receives the image information of the source frame, the pixel format and pixel origin must match with the source frame
57 * @param worker Optional worker object to distribute the computational load
58 * @return True, if succeeded
59 */
60 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
61
62 /**
63 * Resizes a given frame in place by a nearest pixel search.
64 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
65 * @param frame The frame that will be resized, must have a zipped pixel format, must be valid
66 * @param targetWidth Width of the new target frame in pixel, with range [1, infinity)
67 * @param targetHeight Height of the new target frame in pixel, with range [1, infinity)
68 * @param worker Optional worker object to distribute the computational load
69 * @return True, if succeeded
70 */
71 static inline bool resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker = nullptr);
72
73 /**
74 * Applies an affine image transformation to a frame (with zipped pixel format) and renders using nearest-neighbor interpolation
75 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
76 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation
77 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
78 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
79 * @param output The output frame resulting by application of the given affine transformation, with same pixel format and pixel origin as the input frame, must have a valid dimension
80 * @param input_A_output Affine transformation used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
81 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
82 * @param worker Optional worker object to distribute the computational load
83 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
84 * @return True, if succeeded
85 */
86 static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
87
88 /**
89 * Transforms a given input frame (with zipped pixel format) into an output frame by application of a homography.
90 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
91 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
92 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
93 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
94 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
95 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
96 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
97 * @param worker Optional worker object to distribute the computational load
98 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
99 * @return True, if succeeded
100 */
101 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
102
103 /**
104 * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
105 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
106 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
107 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
108 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
109 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
110 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
111 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
112 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
113 * @param worker Optional worker object to distribute the computational load
114 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
115 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
116 * @return True, if succeeded
117 * @see Geometry::Homography::coversHomographyInputFrame().
118 */
119 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
120
121 /**
122 * Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation lookup table.
123 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
124 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
125 * @param input The input frame which will be transformed, must have a zipped pixel format, must be valid
126 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
127 * @param lookupTable The lookup table which defines the transformation, must be valid
128 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
129 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
130 * @param worker Optional worker object to distribute the computation
131 * @return True, if succeeded
132 */
133 static bool transform(const Frame& input, Frame& output, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, Worker* worker = nullptr);
134
135 /**
136 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
137 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
138 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
139 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
140 * @param input The input frame which will be transformed
141 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
142 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
143 * @param lookupTable The lookup table which defines the transformation, must be valid
144 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
145 * @param worker Optional worker object to distribute the computation
146 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
147 * @return True, if succeeded
148 */
149 static bool transformMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& lookupTable, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
150
151 /**
152 * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
153 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate90().
154 * @param input The input frame which will be rotated, must be valid
155 * @param output The resulting rotated output frame, the frame type will be set automatically
156 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
157 * @param worker Optional worker object to distribute the computation
158 * @return True, if succeeded
159 */
160 static inline bool rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker = nullptr);
161
162 /**
163 * Rotates a given frame by 180 degrees.
164 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate180().
165 * @param input The input frame which will be rotated, must be valid
166 * @param output The resulting rotated output frame, the frame type will be set automatically
167 * @param worker Optional worker object to distribute the computation
168 * @return True, if succeeded
169 */
170 static inline bool rotate180(const Frame& input, Frame& output, Worker* worker = nullptr);
171
172 /**
173 * Rotates a given frame with 90 degree steps.
174 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate().
175 * @param input The input frame which will be rotated, must be valid
176 * @param output The resulting rotated output frame, the frame type will be set automatically
177 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
178 * @param worker Optional worker object to distribute the computation
179 * @return True, if succeeded
180 */
181 static bool rotate(const Frame& input, Frame& output, const int angle, Worker* worker = nullptr);
182 };
183
184 /**
185 * This class implements highly optimized interpolation functions with fixed properties.
186 * The functions can be significantly faster as these functions are tailored to the specific properties.
187 */
188 class OCEAN_CV_EXPORT SpecialCases
189 {
190 public:
191
192 /**
193 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
194 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
195 * @param source The source frame buffer with resolution 400x400, must be valid
196 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
197 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
198 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
199 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
200 */
201 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
202 };
203
204 /**
205 * Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the process.
206 * @param source The source frame buffer, must be valid
207 * @param target The target frame buffer, must be valid
208 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
209 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
210 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
211 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
212 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
213 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
214 * @param worker Optional worker to distribute the computation
215 * @tparam T Data type the pixel channel values
216 * @tparam tChannels Number of data channels, with range [1, infinity)
217 */
218 template <typename T, unsigned int tChannels>
219 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
220
221 /**
222 * Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
223 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
224 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation.
225 * @param input The input frame that will be transformed, must be valid
226 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
227 * @param inputHeight Height of both input images pixel, with range [1, infinity)
228 * @param input_A_output The affine transformation used to transform the given input frame, transforming output points to input points, must be valid
229 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
230 * @param output The output frame using the given affine transform, must be valid
231 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
232 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
233 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
234 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
235 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
236 * @param worker Optional worker object to distribute the computational load
237 * @tparam tChannels The number of channels of the frame, with range [1, infinity)
238 */
239 template <unsigned int tChannels>
240 static inline void affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_A_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
241
242 /**
243 * Transforms a given input frame into an output frame by application of a homography.
244 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
245 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
246 * @param input The input frame that will be transformed, must be valid
247 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
248 * @param inputHeight Height of both input images pixel, with range [1, infinity)
249 * @param input_H_output The homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
250 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
251 * @param output The output frame using the given homography, must be valid
252 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
253 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
254 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
255 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
256 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
257 * @param worker Optional worker object to distribute the computational load
258 * @tparam T Data type of each pixel channel, e.g., float, double, int
259 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
260 * @see homographyMask8BitPerChannel().
261 */
262 template <typename T, unsigned int tChannels>
263 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
264
265 /**
266 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
267 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
268 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
269 * @param input The input frame that will be transformed, must be valid
270 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
271 * @param inputHeight Height of both input images pixel, with range [1, infinity)
272 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
273 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
274 * @param output The output frame using the given homography, must be valid
275 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
276 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
277 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
278 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
279 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
280 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
281 * @param worker Optional worker object to distribute the computational load
282 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
283 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
284 * @see homographyMask8BitPerChannel().
285 */
286 template <unsigned int tChannels>
287 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
288
289 /**
290 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
291 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
292 * @param input The input frame which will be transformed, must be valid
293 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
294 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
295 * @param lookupTable The lookup table which defines the transformation, must be valid
296 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
297 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
298 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
299 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
300 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
301 * @param worker Optional worker object to distribute the computation
302 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
303 */
304 template <unsigned int tChannels>
305 static inline void transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
306
307 /**
308 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
309 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
310 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
311 * @param input The input frame which will be transformed, must be valid
312 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
313 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
314 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
315 * @param lookupTable The lookup table which defines the transformation, must be valid
316 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
317 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
318 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
319 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
320 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
321 * @param worker Optional worker object to distribute the computation
322 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
323 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
324 */
325 template <unsigned int tChannels>
326 static inline void transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
327
328 /**
329 * Rotates a given frame either clockwise or counter-clockwise by 90 degree.
330 * @param source The source frame which will be rotated, must be valid
331 * @param target The resulting rotated target frame, must be valid and must have the same buffer size as the source frame
332 * @param sourceWidth The width of the source frame in pixel, with range [1, infinity)
333 * @param sourceHeight The height of the source frame in pixel, with range [1, infinity)
334 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
335 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
336 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
337 * @param worker Optional worker object to distribute the computation
338 * @tparam TElementType Data type of the elements of the image pixels
339 * @tparam tChannels Number of data channels, with range [1, infinity)
340 */
341 template <typename TElementType, unsigned int tChannels>
342 static inline void rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
343
344 /**
345 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the nearest pixel 'interpolation') or whether the homography relies on missing image information.
346 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
347 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
348 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
349 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
350 * @param input_H_output The homography to check which transforms points by following equation: inputPoint = input_H_output * outputPoint, must be valid
351 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
352 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
353 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
354 */
355 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
356
357 private:
358
359 /**
360 * Resizes a given frame by a nearest pixel search.
361 * @param source The source frame buffer, must be valid
362 * @param target The target frame buffer, must be valid
363 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
364 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
365 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
366 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
367 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
368 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
369 * @param firstTargetRow First (including) row to convert, with range [0, targetHeight)
370 * @param numberTargetRows Number of rows to convert, with range [1, targetHeight - firstTargetRow]
371 * @tparam T Data type the pixel channel values
372 * @tparam tChannels Number of data channels, range: [1, infinity)
373 */
374 template <typename T, unsigned int tChannels>
375 static void resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
376
377 /**
378 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation
379 * @param input The input frame that will be transformed, must be valid
380 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
381 * @param inputHeight Height of both input images pixel, with range [1, infinity)
382 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
383 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
384 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
385 * @param output The output frame using the given affine transform, must be valid
386 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
387 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
388 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
389 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
390 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
391 * @tparam tChannels Number of frame channels, range: [1, infinity)
392 */
393 template <unsigned int tChannels>
394 static void affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
395
396 /**
397 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation.
398 * @param input The input frame that will be transformed, must be valid
399 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
400 * @param inputHeight Height of both input images pixel, with range [1, infinity)
401 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
402 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
403 * @param output The output frame using the given homography, must be valid
404 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
405 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
406 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
407 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
408 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
409 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
410 * @tparam T Data type of each pixel channel, e.g., float, double, int
411 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
412 */
413 template <typename T, unsigned int tChannels>
414 static void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
415
416 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
417
418 /**
419 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation (using SSE)
420 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
421 * @param input The input frame that will be transformed
422 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
423 * @param inputHeight Height of both input images pixel, with range [1, infinity)
424 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
425 * @param affineTransform Affine transformation which is applied to input frame.
426 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
427 * @param output The output frame where the result of the transformation will be stored
428 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
429 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
430 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
431 * @param firstOutputRow The first output row to be handled
432 * @param numberOutputRows Number of output rows to be handled
433 * @tparam tChannels Number of frame channels
434 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
435 */
436 template <unsigned int tChannels>
437 static inline void affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
438
439 /**
440 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using SSE).
441 * @param input The input frame that will be transformed, must be valid
442 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
443 * @param inputHeight Height of both input images pixel, with range [1, infinity)
444 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
445 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
446 * @param output The output frame using the given homography, must be valid
447 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
448 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
449 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
450 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
451 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
452 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
453 * @tparam T Data type of each pixel channel, e.g., float, double, int
454 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
455 */
456 template <typename T, unsigned int tChannels>
457 static void homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
458
459 #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
460
461 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
462
463 /**
464 * Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using NEON and integer fixed-point arithmetic)
465 * @param input The input frame that will be transformed, must be valid
466 * @param inputWidth Width of both input images in pixel, with range [1, 65536)
467 * @param inputHeight Height of both input images pixel, with range [1, 65536)
468 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
469 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
470 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
471 * @param output The output frame using the given affine transform, must be valid
472 * @param outputWidth The width of the output image in pixel, with range [1, 65536)
473 * @param outputHeight The height of the output image in pixel, with range [1, 65536)
474 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
475 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
476 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
477 * @tparam tChannels Number of frame channels
478 */
479 template <unsigned int tChannels>
480 static inline void affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
481
482 /**
483 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using NEON).
484 * Beware: The output width 'outputWidth' must be >= 4, use homographySubset for small output frames
485 * @param input The input frame that will be transformed
486 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
487 * @param inputHeight Height of both input images pixel, with range [1, infinity)
488 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
489 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, @c nullptr to assign 0 to each channel
490 * @param output The output frame using the given homography
491 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
492 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
493 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
494 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
495 * @param firstOutputRow The first output row to be handled
496 * @param numberOutputRows Number of output rows to be handled
497 * @tparam T Data type of each pixel channel, e.g., float, double, int
498 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
499 * @see homographySubset().
500 */
501 template <typename T, unsigned int tChannels>
502 static inline void homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
503
504 #endif // OCEAN_HARDWARE_NEON_VERSION
505
506 /**
507 * Transforms an 8 bit per channel frame using the given homography.
508 * @param input The input frame that will be transformed, must be valid
509 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
510 * @param inputHeight Height of both input images pixel, with range [1, infinity)
511 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
512 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
513 * @param output The output frame using the given homography, must be valid
514 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
515 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
516 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
517 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
518 * @param outputOriginX The horizontal coordinate of the output frame's origin
519 * @param outputOriginY The vertical coordinate of the output frame's origin
520 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
521 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
522 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
523 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
524 * @tparam tChannels Number of frame channels
525 */
526 template <unsigned int tChannels>
527 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
528
529 /**
530 * Transforms a subset of a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
531 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
532 * @param input the input frame which will be transformed, must be valid
533 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
534 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
535 * @param lookupTable The lookup table which defines the transformation, must be valid
536 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
537 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
538 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
539 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
540 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
541 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
542 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
543 * @tparam tChannels Number of channels of the frame
544 */
545 template <unsigned int tChannels>
546 static void transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
547
548 /**
549 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
550 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
551 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
552 * @param input The input frame which will be transformed, must be valid
553 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
554 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
555 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
556 * @param lookupTable The lookup table which defines the transformation, must be valid
557 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
558 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
559 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
560 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
561 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
562 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
563 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
564 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
565 * @tparam tChannels Number of channels of the frame
566 */
567 template <unsigned int tChannels>
568 static void transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
569};
570
571inline bool FrameInterpolatorNearestPixel::Comfort::resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker)
572{
573 ocean_assert(frame && targetWidth >= 1u && targetHeight >= 1u);
574
575 Frame tmpFrame(FrameType(frame, targetWidth, targetHeight));
576
577 if (!resize(frame, tmpFrame, worker))
578 {
579 return false;
580 }
581
582 tmpFrame.setTimestamp(frame.timestamp());
583 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
584
585 frame = std::move(tmpFrame);
586 return true;
587}
588
589inline bool FrameInterpolatorNearestPixel::Comfort::rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker)
590{
591 return FrameTransposer::Comfort::rotate90(input, output, clockwise, worker);
592}
593
594inline bool FrameInterpolatorNearestPixel::Comfort::rotate180(const Frame& input, Frame& output, Worker* worker)
595{
596 return FrameTransposer::Comfort::rotate180(input, output, worker);
597}
598
599inline bool FrameInterpolatorNearestPixel::Comfort::rotate(const Frame& input, Frame& output, const int angle, Worker* worker)
600{
601 return FrameTransposer::Comfort::rotate(input, output, angle, worker);
602}
603
604template <typename T, unsigned int tChannels>
605inline void FrameInterpolatorNearestPixel::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
606{
607 ocean_assert(source && target);
608
609 if (worker)
610 {
611 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::resizeSubset<T, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
612 }
613 else
614 {
615 resizeSubset<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
616 }
617}
618
619template <unsigned int tChannels>
620inline void FrameInterpolatorNearestPixel::affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& affineTransform, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
621{
622 // Merge the additional translation into the affine transformation
623 const SquareMatrix3 adjustedAffineTransform = affineTransform * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
624
625 if (worker)
626 {
627 if (outputWidth >= 4u)
628 {
629#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
630 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
631 return;
632#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
633 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
634 {
635 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 32u);
636 return;
637 }
638#endif
639 }
640
641 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
642 }
643 else
644 {
645 if (outputWidth >= 4u)
646 {
647#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
648 affine8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
649 return;
650#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
651 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
652 {
653 affine8BitPerChannelIntegerNEONSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
654 return;
655 }
656#endif
657 }
658
659 affine8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
660 }
661}
662
663template <typename T, unsigned int tChannels>
664inline void FrameInterpolatorNearestPixel::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
665{
666 static_assert(tChannels >= 1u, "Invalid channel number!");
667
668 // Merge the additional translation into the homography
669 const SquareMatrix3 input_H_adjustedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
670
671 using MappedTypeT = typename TypeMapper<T>::Type;
672
673 if (worker)
674 {
675 if (outputWidth >= 4u)
676 {
677#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
678 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySSESubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
679 return;
680#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
681 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyNEONSubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
682 return;
683#endif
684 }
685
686 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
687 }
688 else
689 {
690 if (outputWidth >= 4u)
691 {
692#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
693 homographySSESubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
694 return;
695#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
696 homographyNEONSubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
697 return;
698#endif
699 }
700
701 homographySubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
702 }
703}
704
705template <unsigned int tChannels>
706inline void FrameInterpolatorNearestPixel::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker, const uint8_t maskValue)
707{
708 if (worker)
709 {
710 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, 0u), 0u, outputHeight, 14u, 15u, 20u);
711 }
712 else
713 {
714 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, outputHeight);
715 }
716}
717
718template <unsigned int tChannels>
719inline void FrameInterpolatorNearestPixel::transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
720{
721 if (worker)
722 {
723 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transform8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 9u, 10u, 20u);
724 }
725 else
726 {
727 transform8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(lookupTable.sizeY()));
728 }
729}
730
731template <unsigned int tChannels>
732inline void FrameInterpolatorNearestPixel::transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
733{
734 if (worker)
735 {
736 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 11u, 12u, 20u);
737 }
738 else
739 {
740 transformMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, (unsigned int)lookupTable.sizeY());
741 }
742}
743
744template <typename TElementType, unsigned int tChannels>
745inline void FrameInterpolatorNearestPixel::rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
746{
747 static_assert(tChannels >= 1u, "Invalid channel number!");
748
749 ocean_assert(source != nullptr && target != nullptr);
750 ocean_assert(source != target);
751 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
752
753 FrameTransposer::rotate90<TElementType, tChannels>(source, target, sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, worker);
754}
755
756template <typename T, unsigned int tChannels>
757void FrameInterpolatorNearestPixel::resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
758{
759 static_assert(tChannels > 0u, "Invalid channel number!");
760 static_assert(sizeof(T) != 0, "Invalid data type!");
761
762 ocean_assert(source != nullptr && target != nullptr);
763 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
764 ocean_assert(targetWidth != 0u && targetHeight != 0u);
765
766 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
767
768 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
769 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
770
771 Memory memoryHorizontalLookups = Memory::create<unsigned int>(targetWidth);
772 unsigned int* horizontalLookups = memoryHorizontalLookups.data<unsigned int>();
773
774 for (unsigned int tx = 0u; tx < targetWidth; ++tx)
775 {
776 const unsigned int sx = tx * sourceWidth / targetWidth;
777 ocean_assert(sx < sourceWidth);
778
779 horizontalLookups[tx] = sx * tChannels;
780 }
781
782 target += firstTargetRow * targetStrideElements;
783
784 for (unsigned int ty = firstTargetRow; ty < firstTargetRow + numberTargetRows; ++ty)
785 {
786 const unsigned int sy = ty * sourceHeight / targetHeight;
787 ocean_assert(sy < sourceHeight);
788
789 const T* const sourceRow = source + sy * sourceStrideElements;
790
791 for (unsigned int tx = 0; tx < targetWidth; ++tx)
792 {
793 const T* const sourcePointer = sourceRow + horizontalLookups[tx];
794
795 for (unsigned int n = 0u; n < tChannels; ++n)
796 {
797 *target++ = sourcePointer[n];
798 }
799 }
800
801 target += targetPaddingElements;
802 }
803}
804
805template <unsigned int tChannels>
806void FrameInterpolatorNearestPixel::affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
807{
808 static_assert(tChannels >= 1u, "Invalid channel number!");
809
810 ocean_assert(input != nullptr && output != nullptr);
811 ocean_assert(inputWidth > 0u && inputHeight > 0u);
812 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
813 ocean_assert(affineTransform);
814 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
815
816 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
817
818 using PixelType = typename DataType<uint8_t, tChannels>::Type;
819
820 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
821 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
822
823 PixelType* outputData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
824
825 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
826 {
827 //
828 // We can slightly optimize the 3x3 matrix multiplication:
829 //
830 // | X0 Y0 Z0 | | x |
831 // | X1 Y1 Z1 | * | y |
832 // | 0 0 1 | | 1 |
833 //
834 // | xx | | X0 * x | | Y0 * y + Z0 |
835 // | yy | = | X1 * x | + | Y1 * y + Z1 |
836 //
837 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
838 //
839 // C0 = Y0 * y + Z0
840 // C1 = Y1 * y + Z1
841 //
842 // So the computation becomes:
843 //
844 // | x' | | X0 * x | | C0 |
845 // | y' | = | X1 * x | + | C1 |
846 //
847
848 const Vector2 X(affineTransform->data() + 0);
849 const Vector2 c(Vector2(affineTransform->data() + 3) * Scalar(y) + Vector2(affineTransform->data() + 6));
850
851 for (unsigned int x = 0u; x < outputWidth; ++x)
852 {
853 const Vector2 inputPosition = X * Scalar(x) + c;
854
855#ifdef OCEAN_DEBUG
856 const Scalar debugX = (*affineTransform)[0] * Scalar(x) + (*affineTransform)[3] * Scalar(y) + (*affineTransform)[6];
857 const Scalar debugY = (*affineTransform)[1] * Scalar(x) + (*affineTransform)[4] * Scalar(y) + (*affineTransform)[7];
858 ocean_assert(inputPosition.isEqual(Vector2(debugX, debugY), Scalar(0.01)));
859#endif
860
861 const unsigned int inputX = Numeric::round32(inputPosition.x());
862 const unsigned int inputY = Numeric::round32(inputPosition.y());
863
864 if (inputX < inputWidth && inputY < inputHeight)
865 {
866 *outputData = *(PixelType*)(input + inputY * (inputWidth * tChannels + inputPaddingElements) + inputX * tChannels);
867 }
868 else
869 {
870 *outputData = *bColor;
871 }
872
873 outputData++;
874 }
875
876 outputData = (PixelType*)((uint8_t*)outputData + outputPaddingElements);
877 }
878}
879
880template <typename T, unsigned int tChannels>
881void FrameInterpolatorNearestPixel::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
882{
883 static_assert(tChannels > 0u, "Invalid channel number!");
884
885 ocean_assert(input != nullptr && output != nullptr);
886 ocean_assert(inputWidth > 0u && inputHeight > 0u);
887 ocean_assert(outputWidth > 0u && outputHeight > 0u);
888 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
889
890 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
891
892 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
893 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
894
895 using PixelType = typename DataType<T, tChannels>::Type;
896
897 const T zeroColor[tChannels] = {T(0)};
898 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
899
900 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
901 {
902 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
903
904 for (unsigned int x = 0u; x < outputWidth; ++x)
905 {
906 const Vector2 outputPosition = Vector2(Scalar(x), Scalar(y));
907 const Vector2 inputPosition(*input_H_output * outputPosition);
908
909 const unsigned int inputX = Numeric::round32(inputPosition.x());
910 const unsigned int inputY = Numeric::round32(inputPosition.y());
911
912 if (inputX < inputWidth && inputY < inputHeight)
913 {
914 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
915 }
916 else
917 {
918 *outputData = bColor;
919 }
920
921 outputData++;
922 }
923 }
924}
925
926#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
927
928template <unsigned int tChannels>
929inline void FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
930{
931 static_assert(tChannels >= 1u, "Invalid channel number!");
932
933 ocean_assert(input && output);
934 ocean_assert(inputWidth > 0u && inputHeight > 0u);
935 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
936 ocean_assert(affineTransform);
937 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
938
939 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
940
941 using PixelType = typename DataType<uint8_t, tChannels>::Type;
942
943 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
944 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
945
946 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
947
948 OCEAN_ALIGN_DATA(16)
949 unsigned int nearestNeighbours[4];
950
951 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
952 const __m128 m128_f_X0 = _mm_set_ps1(float((*affineTransform)(0, 0)));
953 const __m128 m128_f_X1 = _mm_set_ps1(float((*affineTransform)(1, 0)));
954
955 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
956 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputWidth * tChannels + inputPaddingElements);
957
958 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
959 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
960
961 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
962 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
963
964 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
965 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
966
967 // m128_i_zero = [0, 0, 0, 0]
968 const __m128i m128_i_zero = _mm_setzero_si128();
969
970 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
971 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
972
973 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
974 {
975 // We can slightly optimize the 3x3 matrix multiplication:
976 //
977 // | X0 Y0 Z0 | | x |
978 // | X1 Y1 Z1 | * | y |
979 // | 0 0 1 | | 1 |
980 //
981 // | xx | | X0 * x | | Y0 * y + Z0 |
982 // | yy | = | X1 * x | + | Y1 * y + Z1 |
983 //
984 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
985 //
986 // C0 = Y0 * y + Z0
987 // C1 = Y1 * y + Z1
988 //
989 // So the computation becomes:
990 //
991 // | x' | | X0 * x | | C0 |
992 // | y' | = | X1 * x | + | C1 |
993
994 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
995 const __m128 m128_f_C0 = _mm_set_ps1(float((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2)));
996 const __m128 m128_f_C1 = _mm_set_ps1(float((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2)));
997
998 for (unsigned int x = 0u; x < outputWidth; x += 4u)
999 {
1000 if (x + 4u > outputWidth)
1001 {
1002 // the last iteration will not fit into the output frame,
1003 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1004
1005 ocean_assert(x >= 4u && outputWidth > 4u);
1006 const unsigned int newX = outputWidth - 4u;
1007
1008 ocean_assert(x > newX);
1009 outputPixelData -= x - newX;
1010
1011 x = newX;
1012
1013 // the for loop will stop after this iteration
1014 ocean_assert(!(x + 4u < outputWidth));
1015 }
1016
1017 // we need four successive x coordinate floats:
1018 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1019 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1020
1021 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1022 const __m128 m128_f_inputX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1023 const __m128 m128_f_inputY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1024
1025 // Compute the coordinates of the nearest neighbors
1026 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1027 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1028
1029 // Note: Detection of input position outside the input image
1030 //
1031 // If the input point is outside the input image, then set the index
1032 // of its nearest neighbor to a value that is above the number of
1033 // available pixels in the image. When writing to the output, a
1034 // check will make sure to use the background color for those
1035 // pixels:
1036 //
1037 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1038 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1039 //
1040 // This approach keeps the amount of data that has to be transferred
1041 // between SSE and CPU registers to a minimum.
1042
1043 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1044 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1045 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1046 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1047
1048 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1049 // nearestNeighborsElement = (isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1050 const __m128i m_128_i_nearestNeighborElements = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels)));
1051 _mm_store_si128((__m128i*)nearestNeighbours, m_128_i_nearestNeighborElements);
1052
1053 // Update the output pixels
1054 outputPixelData[0] = nearestNeighbours[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[0]) : *bColor;
1055 outputPixelData[1] = nearestNeighbours[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[1]) : *bColor;
1056 outputPixelData[2] = nearestNeighbours[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[2]) : *bColor;
1057 outputPixelData[3] = nearestNeighbours[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[3]) : *bColor;
1058
1059 outputPixelData += 4u;
1060 }
1061
1062 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1063 }
1064}
1065
1066template <typename T, unsigned int tChannels>
1067void FrameInterpolatorNearestPixel::homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1068{
1069 static_assert(tChannels > 0u, "Invalid channel number!");
1070
1071 ocean_assert(input != nullptr && output != nullptr);
1072 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1073 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1074 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1075
1076 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1077
1078 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1079 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1080
1081 using PixelType = typename DataType<T, tChannels>::Type;
1082
1083 const T zeroColor[tChannels] = {T(0)};
1084 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1085
1086 OCEAN_ALIGN_DATA(16) unsigned int nearestNeighbourElementOffsets[4];
1087
1088 // | X0 Y0 Z0 | | x |
1089 // Homography H = | X1 Y1 Z1 |, point p = | y |
1090 // | X2 Y2 Z2 | | 1 |
1091 //
1092 // | xx |
1093 // pp = H * p = | yy |
1094 // | zz |
1095 //
1096 // | xx | | X0 Y0 Z0 | | x |
1097 // <=> | yy | = | X1 Y1 Z1 | * | y |
1098 // | zz | | X2 Y2 Z2 | | 1 |
1099 //
1100 // | xx | | X0 * x | | Y0 * y + Z0 |
1101 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1102 // | zz | | X2 * x | | Y2 * y + Z2 |
1103 //
1104 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1105 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1106 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1107 //
1108 // Where C is a constant term that can be pre-computed (per image row)
1109 //
1110 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1111 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1112
1113 // [Xi, Xi, Xi, Xi], i = {0, 1, 2}
1114 const __m128 m128_f_X0 = _mm_set_ps1((float)(*input_H_output)(0, 0));
1115 const __m128 m128_f_X1 = _mm_set_ps1((float)(*input_H_output)(1, 0));
1116 const __m128 m128_f_X2 = _mm_set_ps1((float)(*input_H_output)(2, 0));
1117
1118 // Store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1119 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
1120
1121 const unsigned int inputPixelElementIndexEnd = inputHeight * inputStrideElements;
1122
1123 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
1124 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
1125
1126 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
1127 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
1128
1129 // [tChannels, tChannels, tChannels tChannels]
1130 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
1131
1132 // m128_i_zero = [0, 0, 0, 0]
1133 const __m128i m128_i_zero = _mm_setzero_si128();
1134
1135 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1136 {
1137 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1138
1139 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1140 const __m128 m128_f_C0 = _mm_set_ps1((float)((*input_H_output)(0, 1) * Scalar(y) + ((*input_H_output)(0, 2))));
1141 const __m128 m128_f_C1 = _mm_set_ps1((float)((*input_H_output)(1, 1) * Scalar(y) + ((*input_H_output)(1, 2))));
1142 const __m128 m128_f_C2 = _mm_set_ps1((float)((*input_H_output)(2, 1) * Scalar(y) + ((*input_H_output)(2, 2))));
1143
1144 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1145 {
1146 if (x + 4u > outputWidth)
1147 {
1148 // the last iteration will not fit into the output frame,
1149 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1150
1151 ocean_assert(x >= 4u && outputWidth > 4u);
1152 const unsigned int newX = outputWidth - 4u;
1153
1154 ocean_assert(x > newX);
1155 outputPixelData -= x - newX;
1156
1157 x = newX;
1158
1159 // the for loop will stop after this iteration
1160 ocean_assert(!(x + 4u < outputWidth));
1161 }
1162
1163 // we need four successive x coordinate floats:
1164 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1165 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1166
1167 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1168 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1169 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1170 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
1171
1172#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
1173
1174 // we calculate the (approximated) inverse of zz,
1175 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
1176 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1177 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
1178
1179 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1180 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
1181 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
1182
1183#else
1184
1185 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1186 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
1187 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
1188
1189#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
1190
1191 // Compute the coordinates of the nearest neighbors
1192 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1193 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1194
1195 // Note: Detection of input position outside the input image
1196 //
1197 // If the input point is outside the input image, then set the index
1198 // of its nearest neighbor to a value that is above the number of
1199 // available pixels in the image. When writing to the output, a
1200 // check will make sure to use the background color for those
1201 // pixels:
1202 //
1203 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1204 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1205 //
1206 // This approach keeps the amount of data that has to be transferred
1207 // between SSE and CPU registers to a minimum.
1208
1209 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1210 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1211 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1212 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1213
1214 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1215 // m_128_i_nearestNeighbors = (isOutsideImage ? 0xFFFFFFFF : inputY * inputWidth + inputX)
1216 const __m128i m_128_i_nearestNeighbors = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels))); // nn = y' * inputWidth + x'
1217 _mm_store_si128((__m128i*)nearestNeighbourElementOffsets, m_128_i_nearestNeighbors);
1218
1219 // Update the output pixels
1220 outputPixelData[0] = nearestNeighbourElementOffsets[0] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1221 outputPixelData[1] = nearestNeighbourElementOffsets[1] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1222 outputPixelData[2] = nearestNeighbourElementOffsets[2] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1223 outputPixelData[3] = nearestNeighbourElementOffsets[3] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1224
1225 outputPixelData += 4u;
1226 }
1227 }
1228}
1229
1230#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1231
1232#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1233
1234template <unsigned int tChannels>
1235void FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1236{
1237 // The following optimizations have been applied:
1238 //
1239 // - Matrix-vector multiplication for affine transformations:
1240 //
1241 // | x' | | X0 Y0 Z0 | | x |
1242 // | y' | = | X1 Y1 Z1 | * | y |
1243 // | 1 | | 0 0 1 | | 1 |
1244 //
1245 // which is
1246 //
1247 // x' = X0 * x + Y0 * y + Z0
1248 // y' = X1 * x + Y1 * y + Z1
1249 //
1250 // We can slightly optimize this operation, since y is constant within the inner
1251 // loop. The two terms on the right side in the above equations can be
1252 // pre-calculated:
1253 //
1254 // C0 = Y0 * y + Z0
1255 // C1 = Y1 * y + Z1
1256 //
1257 // So the computation becomes:
1258 //
1259 // | x' | | X0 * x | | C0 |
1260 // | y' | = | X1 * x | + | C1 |
1261 //
1262 // - For better utilization of cache coherence, the (output) image is processed
1263 // in blocks (64 x 64 pixels, if possible)
1264 //
1265 // - Integer fixed-point arithmetic.
1266 //
1267 // - Update products from floating point numbers with the beginning of blocks,
1268 // because the rounding error of fixed-point operations increases for larger
1269 // values:
1270 //
1271 // f - float number
1272 // i - fixed-point representation of f
1273 // v - coordinate value
1274 // eps = (f - i) - loss of precision (eps > 0)
1275 //
1276 // Rounding error:
1277 //
1278 // e = |(v * f) - (v * i)| = |v * (f - i)| = |v * eps|
1279 // (increases linearly for larger coordinate values v, i.e., with image size)
1280 //
1281 // The rounding error can be kept at bay by replacing the product (v * i) with
1282 // (v * f) at the beginning of each block followed by adding an offset for all
1283 // other pixels in the block, (N * f) where N is the number of pixels which
1284 // are processed concurrently by SIMD instructions.
1285 //
1286
1287 static_assert(tChannels >= 1u, "Invalid channel number!");
1288
1289 constexpr unsigned int fractionalBits = 15u;
1290 constexpr unsigned int totalBits = (unsigned int)(CHAR_BIT * sizeof(int));
1291
1292 static_assert((fractionalBits + 1u /* sign bit */) < totalBits, "Number of fractional bits exceeds number of total bits");
1293
1294 constexpr unsigned int maxImageEdgeLength = 1u << (totalBits - fractionalBits - 1u /* sign bit */);
1295
1296 // Scale to convert float value, v, to fixed-point value, v_q = int(round(fixedPointScale * v))
1297 constexpr Scalar fixedPointScale = Scalar(1u << fractionalBits);
1298
1299 // Number of pixels processed by NEON in each iteration
1300 constexpr unsigned int pixelsPerIteration = 4u;
1301
1302 ocean_assert(input && output);
1303 ocean_assert_and_suppress_unused(inputWidth > 0u && inputHeight > 0u && inputWidth <= maxImageEdgeLength && inputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1304 ocean_assert_and_suppress_unused(outputWidth >= pixelsPerIteration && outputHeight > 0u && outputWidth <= maxImageEdgeLength && outputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1305 ocean_assert(affineTransform);
1306 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
1307
1308 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1309
1310 using PixelType = typename DataType<uint8_t, tChannels>::Type;
1311
1312 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1313 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
1314
1315 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
1316
1317 const unsigned int outputRowEnd = firstOutputRow + numberOutputRows;
1318
1319 // Inidices of the final nearest neighbor pixel, which are used to the interpolation
1320 unsigned int nearestNeighboursElements[4];
1321
1322 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
1323 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
1324
1325 // m128_u_inputWidth = [inputWidth, intputWidth, intputWidth, intputWidth], and the same for inputHeight
1326 const uint32x4_t m128_u_inputWidth = vdupq_n_u32(inputWidth);
1327 const uint32x4_t m128_u_inputHeight = vdupq_n_u32(inputHeight);
1328
1329 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
1330 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputWidth * tChannels + inputPaddingElements);
1331
1332 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
1333 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1334
1335 // m128_s_offsets_0123 = [0, 1, 2, 3]
1336 const int offsets_0123[4] = { 0, 1, 2, 3 };
1337 const int32x4_t m128_s_offsets_0123 = vld1q_s32(offsets_0123);
1338
1339 // m128_f_pixelsPerIteration = [4.0f, 4.0f, 4.0f, 4.0f]
1340 const float32x4_t m128_f_pixelsPerIteration = vdupq_n_f32((float)pixelsPerIteration);
1341
1342 // Float-based transformation value X0 multiplied with scale for fixed-point
1343 // numbers. This is used to update the fixed-point products, X0 * x and X1 * x,
1344 // at the beginning of each block, i.e.
1345 // m128_f_q_X0 = [v, v, v, v], v = fixedPointScale * X0, and the same for X1
1346 const float32x4_t m128_f_X0 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(0, 0)));
1347 const float32x4_t m128_f_X1 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(1, 0)));
1348
1349 // Increment that is added to fixed-point product computed at the beginning of
1350 // each block, X0 * x and X1 * x, in each iteration inside the block
1351 const int32x4_t m128_s_q_X0x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_pixelsPerIteration));
1352 const int32x4_t m128_s_q_X1x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_pixelsPerIteration));
1353
1354 // Determine the optimal block size
1355 constexpr unsigned int blockSize = 64u;
1356 constexpr unsigned int blockElements = blockSize * blockSize;
1357 const unsigned int blockWidth = std::min(blockElements / std::min(numberOutputRows, blockSize), outputWidth);
1358 const unsigned int blockHeight = std::min(blockElements / blockWidth, numberOutputRows);
1359 ocean_assert(blockWidth > 0u && blockWidth <= outputWidth);
1360 ocean_assert(blockHeight > 0u && blockHeight <= numberOutputRows);
1361
1362 // Index of pixel that is the last in a block of #pixelsPerIterations pixels, i.e. number of remaining pixels after
1363 // this point are less than #pixelsPerIterations. When this pixel index is reached all pointers will be moved left
1364 // so that we can process one last block of #pixelsPerIterations pixels. That also means that depending on the width
1365 // of the output image between [1, pixelsPerIterations) pixels will be computed a second time.
1366 const unsigned int lastMultipleNeonPixelBlockStart = outputWidth - pixelsPerIteration;
1367
1368 // m128_f_lastMultipleNeonPixelBlockStart = [(float)(lastMultipleNeonPixelBlockStart + 0), (float)(lastMultipleNeonPixelBlockStart + 1), (float)(lastMultipleNeonPixelBlockStart + 2), (float)(lastMultipleNeonPixelBlockStart + 3)]
1369 const float32x4_t m128_f_lastMultipleNeonPixelBlockStart = vcvtq_f32_s32(vaddq_s32(vdupq_n_s32((int)lastMultipleNeonPixelBlockStart), m128_s_offsets_0123));
1370
1371 // m128_s_q_X0x_lastMultipleNeonPixelBlockStart = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (lastMultipleNeonPixelBlockStart + i))), i = 0...3, and similarly for X1
1372 const int32x4_t m128_s_q_X0x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_lastMultipleNeonPixelBlockStart));
1373 const int32x4_t m128_s_q_X1x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_lastMultipleNeonPixelBlockStart));
1374
1375 for (unsigned int blockYStart = firstOutputRow; blockYStart < outputRowEnd; blockYStart += blockHeight)
1376 {
1377 const unsigned int blockYEnd = std::min(blockYStart + blockHeight, outputRowEnd);
1378
1379 for (unsigned int blockXStart = 0u; blockXStart < outputWidth; blockXStart += blockWidth)
1380 {
1381 const unsigned int blockXEnd = std::min(blockXStart + blockWidth, outputWidth);
1382
1383 for (unsigned int y = blockYStart; y < blockYEnd; ++y)
1384 {
1385 outputPixelData = (PixelType*)(output + y * (outputWidth * tChannels + outputPaddingElements) + blockXStart * tChannels);
1386
1387 // Constant parts, cf. optimization of matrix-vector multiplication above
1388 // m128_s_C0 = [C0, C0, C0, C0], C0 = int(round(leftShiftFactor * (Y0 * y + Z0))), and similarly for C1
1389 const int32x4_t m128_s_q_C0 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2))));
1390 const int32x4_t m128_s_q_C1 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2))));
1391
1392 // Update products, X0 * x and X1 * x, from floating point numbers with the
1393 // beginning of this block, since the rounding error of fixed-point operations
1394 // increases for larger coordinate values, cf. list of optimizations above.
1395 //
1396 // m128_s_x_0123 = [blockXStart + 0, blockXStart + 1, blockXStart + 2, blockXStart + 3]
1397 const int32x4_t m128_s_x_0123 = vaddq_s32(vdupq_n_s32(int(blockXStart)), m128_s_offsets_0123);
1398
1399 // m128_f_x_0123 = [(float)(x + 0), (float)(x + 1), (float)(x + 2), (float)(x + 3)]
1400 const float32x4_t m128_f_x_0123 = vcvtq_f32_s32(m128_s_x_0123);
1401
1402 // m128_s_q_X0x = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (x + i))), i = 0...3, and similarly for X1
1403 int32x4_t m128_s_q_X0x = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_x_0123));
1404 int32x4_t m128_s_q_X1x = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_x_0123));
1405
1406 for (unsigned int x = blockXStart; x < blockXEnd; x += pixelsPerIteration)
1407 {
1408 if (x + pixelsPerIteration > outputWidth)
1409 {
1410 ocean_assert(x + pixelsPerIteration > outputWidth);
1411 ocean_assert(x >= pixelsPerIteration && outputWidth > pixelsPerIteration);
1412 ocean_assert(lastMultipleNeonPixelBlockStart == (outputWidth - pixelsPerIteration));
1413
1414 outputPixelData -= (x - lastMultipleNeonPixelBlockStart);
1415
1416 x = lastMultipleNeonPixelBlockStart;
1417
1418 m128_s_q_X0x = m128_s_q_X0x_lastMultipleNeonPixelBlockStart;
1419 m128_s_q_X1x = m128_s_q_X1x_lastMultipleNeonPixelBlockStart;
1420
1421 // the for loop will stop after this iteration
1422 ocean_assert(!(x + pixelsPerIteration < outputWidth));
1423 }
1424
1425 // Compute pixel location in the input image
1426 // m128_s_q_inputX = x' = C0 + X0 * x
1427 // m128_s_q_inputY = y' = C1 + X1 * y
1428 const int32x4_t m128_s_q_inputX = vaddq_s32(m128_s_q_C0, m128_s_q_X0x);
1429 const int32x4_t m128_s_q_inputY = vaddq_s32(m128_s_q_C1, m128_s_q_X1x);
1430
1431 // Convert (signed) fixed-point location to unsigned int, i.e., negative values
1432 // will be larger than image dimensions (width, height), cf. note below
1433 //
1434 // m128_u_inputX = (unsigned int) round(inputX >> N)
1435 // m128_u_inputY = (unsigned int) round(inputY >> N)
1436 const uint32x4_t m128_u_inputX = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputX, fractionalBits));
1437 const uint32x4_t m128_u_inputY = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputY, fractionalBits));
1438
1439 // Note: Detection of input position outside the input image
1440 //
1441 // If the input point is outside the input image, then set the index
1442 // of its nearest neighbor to a value that is above the number of
1443 // available pixels in the image. When writing to the output, a
1444 // check will make sure to use the background color for those
1445 // pixels:
1446 //
1447 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1448 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1449 //
1450 // This approach keeps the amount of data that has to be transferred
1451 // between NEON and CPU registers to a minimum.
1452
1453 // Casting negative signed values to unsigned value results in very large values, e.g., ((unsigned int) -1) > inputWidth.
1454 // We'll exploit that below to check is pixel coordinates are outside the image.
1455 // m128_u_isOutsideImage = (x >= inputWidth || y >= inputHeight) ? 0xFFFFFFFF : 0x00000000;
1456 const uint32x4_t m128_u_isOutsideImage = vorrq_u32(vcgeq_u32(m128_u_inputX, m128_u_inputWidth), vcgeq_u32(m128_u_inputY, m128_u_inputHeight));
1457
1458 // Determine the pixel indices of the nearest neighbors and store the result
1459 // If the pixel is outside the image then set the index of the nearest neighbor to the largest possible value
1460 // m_128_u_nearestNeighbors = m128_u_isOutsideImage | (inputY * inputStrideElements) + (inputX * channels);
1461 // which is equivalent to
1462 // m_128_u_nearestNeighborElements = (m128_u_isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1463 const uint32x4_t m_128_u_nearestNeighborsElements = vorrq_u32(m128_u_isOutsideImage, vaddq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), vmulq_u32(m128_u_inputX, m128_u_channels)));
1464 vst1q_u32(nearestNeighboursElements, m_128_u_nearestNeighborsElements);
1465
1466 outputPixelData[0] = nearestNeighboursElements[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[0]) : *bColor;
1467 outputPixelData[1] = nearestNeighboursElements[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[1]) : *bColor;
1468 outputPixelData[2] = nearestNeighboursElements[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[2]) : *bColor;
1469 outputPixelData[3] = nearestNeighboursElements[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[3]) : *bColor;
1470
1471 outputPixelData += pixelsPerIteration;
1472
1473 // m128_s_q_X0x += m128_s_q_X0x_increment, and similarly for X1
1474 m128_s_q_X0x = vaddq_s32(m128_s_q_X0x, m128_s_q_X0x_increment);
1475 m128_s_q_X1x = vaddq_s32(m128_s_q_X1x, m128_s_q_X1x_increment);
1476 }
1477 }
1478 }
1479
1480 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1481 }
1482}
1483
1484template <typename T, unsigned int tChannels>
1485void FrameInterpolatorNearestPixel::homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1486{
1487 static_assert(tChannels >= 1u, "Invalid channel number!");
1488
1489 ocean_assert(input != nullptr && output != nullptr);
1490 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1491 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1492 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1493
1494 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1495
1496 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1497 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1498
1499 using PixelType = typename DataType<T, tChannels>::Type;
1500
1501 const T zeroColor[tChannels] = {T(0)};
1502 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1503
1504 unsigned int validPixels[4];
1505 unsigned int nearestNeighbourElementOffsets[4];
1506
1507 // | X0 Y0 Z0 | | x |
1508 // Homography H = | X1 Y1 Z1 |, point p = | y |
1509 // | X2 Y2 Z2 | | 1 |
1510 //
1511 // | xx |
1512 // pp = H * p = | yy |
1513 // | zz |
1514 //
1515 // | xx | | X0 Y0 Z0 | | x |
1516 // <=> | yy | = | X1 Y1 Z1 | * | y |
1517 // | zz | | X2 Y2 Z2 | | 1 |
1518 //
1519 // | xx | | X0 * x | | Y0 * y + Z0 |
1520 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1521 // | zz | | X2 * x | | Y2 * y + Z2 |
1522 //
1523 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1524 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1525 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1526 //
1527 // Where C is a constant term that can be pre-computed (per image row)
1528 //
1529 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1530 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1531
1532 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
1533 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
1534 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
1535 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
1536
1537 // we store 4 floats: [0.5f, 0.5f, 0.5f, 0.5f]
1538 const float32x4_t m128_f_pointFive = vdupq_n_f32(0.5f);
1539 const float32x4_t m128_f_negPointFive = vdupq_n_f32(-0.5f);
1540
1541 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1542 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
1543
1544 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1545
1546 // we store 4 floats: [inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f], and same with inputHeight
1547 const float32x4_t m128_f_inputWidth_pointFive = vdupq_n_f32(float(inputWidth) - 0.5f);
1548 const float32x4_t m128_f_inputHeight_pointFive = vdupq_n_f32(float(inputHeight) - 0.5f);
1549
1550 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1551 {
1552 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1553
1554 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1555 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
1556 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
1557 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
1558
1559 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1560 {
1561 if (x + 4u > outputWidth)
1562 {
1563 // Since the last iteration will not fit into the output frame, we'll shift N pixel left so that it fits again (at most 3 pixels).
1564
1565 ocean_assert(x >= 4u && outputWidth > 4u);
1566 const unsigned int newX = outputWidth - 4u;
1567
1568 ocean_assert(x > newX);
1569 outputPixelData -= x - newX;
1570
1571 x = newX;
1572
1573 // the for loop will stop after this iteration
1574 ocean_assert(!(x + 4u < outputWidth));
1575 }
1576
1577 // we need four successive x coordinate floats:
1578 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1579 float x_0123[4] = { float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u) };
1580 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
1581
1582 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1583 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
1584 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
1585 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
1586
1587#ifdef USE_DIVISION_ARM64_ARCHITECTURE
1588
1589 // using the division available from ARM64 is more precise
1590 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
1591 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
1592
1593#else
1594
1595 // we calculate the (approximated) inverse of zz
1596 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1597 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
1598 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
1599
1600 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1601 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
1602 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
1603
1604#endif // USE_DIVISION_ARM64_ARCHITECTURE
1605
1606 // Mark pixels inside the input image as valid, all others as invalid
1607 const uint32x4_t m128_u_validPixelX = vandq_u32(vcltq_f32(m128_f_inputX, m128_f_inputWidth_pointFive), vcgtq_f32(m128_f_inputX, m128_f_negPointFive)); // inputX < (inputWidth - 0.5) && inputX >= -0.5 ? 0xFFFFFFFF : 0x00000000
1608 const uint32x4_t m128_u_validPixelY = vandq_u32(vcltq_f32(m128_f_inputY, m128_f_inputHeight_pointFive), vcgtq_f32(m128_f_inputY, m128_f_negPointFive)); // inputY < (inputHeight - 0.5) && inputY > -0.5 ? 0xFFFFFFFF : 0x00000000
1609
1610 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
1611
1612 // Stop here if all pixels are invalid
1613 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
1614 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
1615 {
1616#ifdef OCEAN_DEBUG
1617 // clang-format off
1618 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
1619 // clang-format on
1620 vst1q_u32(debugValidPixels, m128_u_validPixel);
1621 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
1622#endif
1623
1624 outputPixelData[0] = bColor;
1625 outputPixelData[1] = bColor;
1626 outputPixelData[2] = bColor;
1627 outputPixelData[3] = bColor;
1628
1629 outputPixelData += 4;
1630
1631 continue;
1632 }
1633
1634 // Determine the pixel indices of the nearest neighbors and store the result
1635 vst1q_u32(validPixels, m128_u_validPixel);
1636 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
1637
1638 const uint32x4_t m128_u_inputX = vcvtq_u32_f32(vaddq_f32(m128_f_inputX, m128_f_pointFive)); // Round to nearest integer: x' = (int) (x + 0.5f)
1639 const uint32x4_t m128_u_inputY = vcvtq_u32_f32(vaddq_f32(m128_f_inputY, m128_f_pointFive)); // Round to nearest integer: y' = (int) (y + 0.5f)
1640 const uint32x4_t m_128_u_nearestNeighbourElementOffsets = vmlaq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), m128_u_inputX, m128_u_channels); // nn = y' * inputStrideElements + x' * channels
1641 vst1q_u32(nearestNeighbourElementOffsets, m_128_u_nearestNeighbourElementOffsets);
1642
1643#ifdef OCEAN_DEBUG
1644 unsigned int debugInputX[4];
1645 unsigned int debugInputY[4];
1646 vst1q_u32(debugInputX, m128_u_inputX);
1647 vst1q_u32(debugInputY, m128_u_inputY);
1648 ocean_assert(!validPixels[0] || (debugInputX[0] < inputWidth && debugInputY[0] < inputHeight));
1649 ocean_assert(!validPixels[1] || (debugInputX[1] < inputWidth && debugInputY[1] < inputHeight));
1650 ocean_assert(!validPixels[2] || (debugInputX[2] < inputWidth && debugInputY[2] < inputHeight));
1651 ocean_assert(!validPixels[3] || (debugInputX[3] < inputWidth && debugInputY[3] < inputHeight));
1652#endif
1653
1654 outputPixelData[0] = validPixels[0] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1655 outputPixelData[1] = validPixels[1] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1656 outputPixelData[2] = validPixels[2] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1657 outputPixelData[3] = validPixels[3] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1658
1659 outputPixelData += 4;
1660 }
1661 }
1662}
1663
1664#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1665
1666template <unsigned int tChannels>
1667void FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1668{
1669 static_assert(tChannels > 0u, "Invalid channel number!");
1670
1671 ocean_assert(input != nullptr && output != nullptr && outputMask != nullptr);
1672 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1673 ocean_assert(outputWidth > 0u && outputHeight > 0u);
1674 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1675
1676 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1677
1678 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1679 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1680 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1681
1682 using PixelType = typename DataType<uint8_t, tChannels>::Type;
1683
1684 output += firstOutputRow * outputStrideElements;
1685 outputMask += firstOutputRow * outputMaskStrideElements;
1686
1687 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1688 {
1689 PixelType* outputPixel = (PixelType*)(output);
1690
1691 for (unsigned int x = 0; x < outputWidth; ++x)
1692 {
1693 const Vector2 outputPosition = Vector2(Scalar(int(x) + outputOriginX), Scalar(int(y) + outputOriginY));
1694 const Vector2 inputPosition(*input_H_output * outputPosition);
1695
1696 const unsigned int inputX = Numeric::round32(inputPosition.x());
1697 const unsigned int inputY = Numeric::round32(inputPosition.y());
1698
1699 if (inputX < inputWidth && inputY < inputHeight)
1700 {
1701 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1702 *outputMask = maskValue;
1703 }
1704 else
1705 {
1706 *outputMask = 0xFFu - maskValue;
1707 }
1708
1709 ++outputPixel;
1710 ++outputMask;
1711 }
1712
1713 output += outputStrideElements;
1714 outputMask += outputMaskPaddingElements;
1715 }
1716}
1717
1718template <unsigned int tChannels>
1719void FrameInterpolatorNearestPixel::transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
1720{
1721 static_assert(tChannels > 0u, "Invalid channel number!");
1722
1723 ocean_assert(lookupTable != nullptr);
1724 ocean_assert(input != nullptr && output != nullptr);
1725
1726 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1727 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1728
1729 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1730
1731 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1732 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1733
1734 using PixelType = typename DataType<uint8_t, tChannels>::Type;
1735
1736 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1737 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
1738
1739 if (offset)
1740 {
1741 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1742 {
1743 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1744
1745 for (unsigned int x = 0u; x < outputWidth; ++x)
1746 {
1747 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1748 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1749
1750 const unsigned int inputX = Numeric::round32(inputPosition.x());
1751 const unsigned int inputY = Numeric::round32(inputPosition.y());
1752
1753 if (inputX < inputWidth && inputY < inputHeight)
1754 {
1755 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1756 }
1757 else
1758 {
1759 *outputData = *bColor;
1760 }
1761
1762 ++outputData;
1763 }
1764 }
1765 }
1766 else
1767 {
1768 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1769 {
1770 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1771
1772 for (unsigned int x = 0u; x < outputWidth; ++x)
1773 {
1774 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1775
1776 const unsigned int inputX = Numeric::round32(inputPosition.x());
1777 const unsigned int inputY = Numeric::round32(inputPosition.y());
1778
1779 if (inputX < inputWidth && inputY < inputHeight)
1780 {
1781 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1782 }
1783 else
1784 {
1785 *outputData = *bColor;
1786 }
1787
1788 ++outputData;
1789 }
1790 }
1791 }
1792}
1793
1794template <unsigned int tChannels>
1795void FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
1796{
1797 static_assert(tChannels > 0u, "Invalid channel number!");
1798
1799 ocean_assert(lookupTable != nullptr);
1800 ocean_assert(input != nullptr && output != nullptr);
1801
1802 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1803 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1804
1805 ocean_assert(NumericT<unsigned int>::isInsideValueRange(lookupTable->sizeX()));
1806 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1807
1808 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1809 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1810 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1811
1812 using PixelType = typename DataType<uint8_t, tChannels>::Type;
1813
1814 output += firstRow * outputStrideElements;
1815 outputMask += firstRow * outputMaskStrideElements;
1816
1817 if (offset)
1818 {
1819 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1820 {
1821 PixelType* outputPixel = (PixelType*)(output);
1822
1823 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1824 {
1825 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1826 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1827
1828 const unsigned int inputX = Numeric::round32(inputPosition.x());
1829 const unsigned int inputY = Numeric::round32(inputPosition.y());
1830
1831 if (inputX < inputWidth && inputY < inputHeight)
1832 {
1833 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1834 *outputMask = maskValue;
1835 }
1836 else
1837 {
1838 *outputMask = 0xFF - maskValue;
1839 }
1840
1841 ++outputPixel;
1842 ++outputMask;
1843 }
1844
1845 output += outputStrideElements;
1846 outputMask += outputMaskPaddingElements;
1847 }
1848 }
1849 else
1850 {
1851 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1852 {
1853 PixelType* outputPixel = (PixelType*)(output);
1854
1855 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1856 {
1857 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1858
1859 const unsigned int inputX = Numeric::round32(inputPosition.x());
1860 const unsigned int inputY = Numeric::round32(inputPosition.y());
1861
1862 if (inputX < inputWidth && inputY < inputHeight)
1863 {
1864 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1865 *outputMask = maskValue;
1866 }
1867 else
1868 {
1869 *outputMask = 0xFF - maskValue;
1870 }
1871
1872 ++outputPixel;
1873 ++outputMask;
1874 }
1875
1876 output += outputStrideElements;
1877 outputMask += outputMaskPaddingElements;
1878 }
1879 }
1880}
1881
1882} // namespace CV
1883
1884} // namespace Ocean
1885
1886#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorNearestPixel.h:49
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
Definition FrameInterpolatorNearestPixel.h:589
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine image transformation to a frame (with zipped pixel format) and renders using neares...
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search.
static bool transform(const Frame &input, Frame &output, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, Worker *worker=nullptr)
Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation...
static bool transformMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &lookupTable, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFFu, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
Definition FrameInterpolatorNearestPixel.h:594
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
Definition FrameInterpolatorNearestPixel.h:599
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame by application of a ho...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorNearestPixel.h:189
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements a nearest pixel frame interpolator.
Definition FrameInterpolatorNearestPixel.h:35
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the proces...
Definition FrameInterpolatorNearestPixel.h:605
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:664
static void affine8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_A_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
Definition FrameInterpolatorNearestPixel.h:620
static void transform8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:719
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:881
static void resizeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a given frame by a nearest pixel search.
Definition FrameInterpolatorNearestPixel.h:757
static void rotate90(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degree.
Definition FrameInterpolatorNearestPixel.h:745
static void affine8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:929
static void transformMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable *lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:1795
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorNearestPixel.h:1667
static void affine8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:806
static void homographySSESubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1067
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 &input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:706
static void affine8BitPerChannelIntegerNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using ...
Definition FrameInterpolatorNearestPixel.h:1235
static void transform8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame (with zipped pixel format) into an output frame by applica...
Definition FrameInterpolatorNearestPixel.h:1719
static void transformMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable &lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:732
static void homographyNEONSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1485
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:63
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:468
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:456
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1808
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4236
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4231
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4221
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4226
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
T bilinearValue(const TScalar x, const TScalar y) const
Applies a lookup for a specific position in this lookup object.
Definition Lookup2.h:1815
This class implements an object able to allocate memory.
Definition base/Memory.h:22
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr int32_t round32(const T value)
Returns the rounded 32 bit integer value of a given value.
Definition Numeric.h:2067
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2090
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1334
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1047
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition SquareMatrix3.h:1342
typename TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
float Scalar
Definition of a scalar type.
Definition Math.h:129
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:43
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32