Ocean
Loading...
Searching...
No Matches
FrameInterpolatorNearestPixel.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
10
11#include "ocean/cv/CV.h"
14
15#include "ocean/base/DataType.h"
16#include "ocean/base/Frame.h"
17#include "ocean/base/Memory.h"
18#include "ocean/base/Worker.h"
19
20#include "ocean/math/Lookup2.h"
22
23namespace Ocean
24{
25
26namespace CV
27{
28
29/**
30 * This class implements a nearest pixel frame interpolator.
31 * Actually, no pixels are interpolated, but the color intensities from the nearest pixels (e.g., based on rounding) is used.<br>
32 * @ingroup cv
33 */
34class OCEAN_CV_EXPORT FrameInterpolatorNearestPixel
35{
36 public:
37
38 /// Definition of a lookup table for 2D vectors.
40
41 public:
42
43 /**
44 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
45 * Best practice is to avoid using these functions if binary size matters,<br>
46 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
47 */
48 class OCEAN_CV_EXPORT Comfort
49 {
50 public:
51
52 /**
53 * Resizes a given frame by a nearest pixel search.
54 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
55 * @param source The source frame that will be resized, must have a zipped pixel format, must be valid
56 * @param target The target frame that receives the image information of the source frame, the pixel format and pixel origin must match with the source frame
57 * @param worker Optional worker object to distribute the computational load
58 * @return True, if succeeded
59 */
60 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
61
62 /**
63 * Resizes a given frame in place by a nearest pixel search.
64 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
65 * @param frame The frame that will be resized, must have a zipped pixel format, must be valid
66 * @param targetWidth Width of the new target frame in pixel, with range [1, infinity)
67 * @param targetHeight Height of the new target frame in pixel, with range [1, infinity)
68 * @param worker Optional worker object to distribute the computational load
69 * @return True, if succeeded
70 */
71 static inline bool resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker = nullptr);
72
73 /**
74 * Applies an affine image transformation to a frame (with zipped pixel format) and renders using nearest-neighbor interpolation
75 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
76 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation
77 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
78 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
79 * @param output The output frame resulting by application of the given affine transformation, with same pixel format and pixel origin as the input frame, must have a valid dimension
80 * @param input_A_output Affine transformation used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
81 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
82 * @param worker Optional worker object to distribute the computational load
83 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
84 * @return True, if succeeded
85 */
86 static bool affine(const Frame& input, Frame& output, const SquareMatrix3& input_A_output, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
87
88 /**
89 * Transforms a given input frame (with zipped pixel format) into an output frame by application of a homography.
90 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
91 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
92 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
93 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
94 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
95 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
96 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
97 * @param worker Optional worker object to distribute the computational load
98 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
99 * @return True, if succeeded
100 */
101 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
102
103 /**
104 * Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame dimension) by application of a homography.
105 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
106 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
107 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
108 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
109 * @param input The input frame that will be transformed, must have a zipped pixel format, must be valid
110 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
111 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
112 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
113 * @param worker Optional worker object to distribute the computational load
114 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
115 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
116 * @return True, if succeeded
117 * @see Geometry::Homography::coversHomographyInputFrame().
118 */
119 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
120
121 /**
122 * Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation lookup table.
123 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
124 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
125 * @param input The input frame which will be transformed, must have a zipped pixel format, must be valid
126 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
127 * @param lookupTable The lookup table which defines the transformation, must be valid
128 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
129 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
130 * @param worker Optional worker object to distribute the computation
131 * @return True, if succeeded
132 */
133 static bool transform(const Frame& input, Frame& output, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, Worker* worker = nullptr);
134
135 /**
136 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
137 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
138 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
139 * The pixel format of the frame must be zipped with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
140 * @param input The input frame which will be transformed
141 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
142 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
143 * @param lookupTable The lookup table which defines the transformation, must be valid
144 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
145 * @param worker Optional worker object to distribute the computation
146 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
147 * @return True, if succeeded
148 */
149 static bool transformMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& lookupTable, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
150
151 /**
152 * Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
153 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate90().
154 * @param input The input frame which will be rotated, must be valid
155 * @param output The resulting rotated output frame, the frame type will be set automatically
156 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
157 * @param worker Optional worker object to distribute the computation
158 * @return True, if succeeded
159 */
160 static inline bool rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker = nullptr);
161
162 /**
163 * Rotates a given frame by 180 degrees.
164 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate180().
165 * @param input The input frame which will be rotated, must be valid
166 * @param output The resulting rotated output frame, the frame type will be set automatically
167 * @param worker Optional worker object to distribute the computation
168 * @return True, if succeeded
169 */
170 static inline bool rotate180(const Frame& input, Frame& output, Worker* worker = nullptr);
171
172 /**
173 * Rotates a given frame with 90 degree steps.
174 * This function is just a wrapper for CV::FrameTransposer::Comfort::rotate().
175 * @param input The input frame which will be rotated, must be valid
176 * @param output The resulting rotated output frame, the frame type will be set automatically
177 * @param angle The clockwise rotation angle to be used, must be a multiple of +/- 90, with range (-infinity, infinity)
178 * @param worker Optional worker object to distribute the computation
179 * @return True, if succeeded
180 */
181 static bool rotate(const Frame& input, Frame& output, const int angle, Worker* worker = nullptr);
182 };
183
184 /**
185 * This class implements highly optimized interpolation functions with fixed properties.
186 * The functions can be significantly faster as these functions are tailored to the specific properties.
187 */
188 class OCEAN_CV_EXPORT SpecialCases
189 {
190 public:
191
192 /**
193 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
194 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
195 * @param source The source frame buffer with resolution 400x400, must be valid
196 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
197 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
198 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
199 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
200 */
201 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
202 };
203
204 /**
205 * Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the process.
206 * @param source The source frame buffer, must be valid
207 * @param target The target frame buffer, must be valid
208 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
209 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
210 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
211 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
212 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
213 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
214 * @param worker Optional worker to distribute the computation
215 * @tparam T Data type the pixel channel values
216 * @tparam tChannels Number of data channels, with range [1, infinity)
217 */
218 template <typename T, unsigned int tChannels>
219 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
220
221 /**
222 * Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
223 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.
224 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the affine transformation.
225 * @param input The input frame that will be transformed, must be valid
226 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
227 * @param inputHeight Height of both input images pixel, with range [1, infinity)
228 * @param input_A_output The affine transformation used to transform the given input frame, transforming output points to input points, must be valid
229 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
230 * @param output The output frame using the given affine transform, must be valid
231 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
232 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
233 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
234 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
235 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
236 * @param worker Optional worker object to distribute the computational load
237 * @tparam tChannels The number of channels of the frame, with range [1, infinity)
238 */
239 template <unsigned int tChannels>
240 static inline void affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_A_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
241
242 /**
243 * Transforms a given input frame into an output frame by application of a homography.
244 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
245 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
246 * @param input The input frame that will be transformed, must be valid
247 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
248 * @param inputHeight Height of both input images pixel, with range [1, infinity)
249 * @param input_H_output The homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
250 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
251 * @param output The output frame using the given homography, must be valid
252 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
253 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
254 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
255 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
256 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
257 * @param worker Optional worker object to distribute the computational load
258 * @tparam T Data type of each pixel channel, e.g., float, double, int
259 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
260 * @see homographyMask8BitPerChannel().
261 */
262 template <typename T, unsigned int tChannels>
263 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
264
265 /**
266 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
267 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
268 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
269 * @param input The input frame that will be transformed, must be valid
270 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
271 * @param inputHeight Height of both input images pixel, with range [1, infinity)
272 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
273 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
274 * @param output The output frame using the given homography, must be valid
275 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
276 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
277 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
278 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
279 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
280 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
281 * @param worker Optional worker object to distribute the computational load
282 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
283 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
284 * @see homographyMask8BitPerChannel().
285 */
286 template <unsigned int tChannels>
287 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
288
289 /**
290 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
291 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
292 * @param input The input frame which will be transformed, must be valid
293 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
294 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
295 * @param lookupTable The lookup table which defines the transformation, must be valid
296 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
297 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
298 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
299 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
300 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
301 * @param worker Optional worker object to distribute the computation
302 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
303 */
304 template <unsigned int tChannels>
305 static inline void transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
306
307 /**
308 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
309 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
310 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
311 * @param input The input frame which will be transformed, must be valid
312 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
313 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
314 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
315 * @param lookupTable The lookup table which defines the transformation, must be valid
316 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
317 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
318 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
319 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
320 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
321 * @param worker Optional worker object to distribute the computation
322 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
323 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
324 */
325 template <unsigned int tChannels>
326 static inline void transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
327
328 /**
329 * Rotates a given frame either clockwise or counter-clockwise by 90 degree.
330 * @param source The source frame which will be rotated, must be valid
331 * @param target The resulting rotated target frame, must be valid and must have the same buffer size as the source frame
332 * @param sourceWidth The width of the source frame in pixel, with range [1, infinity)
333 * @param sourceHeight The height of the source frame in pixel, with range [1, infinity)
334 * @param clockwise True, to rotate the frame clockwise; False, to rotate the frame counter-clockwise
335 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
336 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
337 * @param worker Optional worker object to distribute the computation
338 * @tparam TElementType Data type of the elements of the image pixels
339 * @tparam tChannels Number of data channels, with range [1, infinity)
340 */
341 template <typename TElementType, unsigned int tChannels>
342 static inline void rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
343
344 /**
345 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the nearest pixel 'interpolation') or whether the homography relies on missing image information.
346 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
347 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
348 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
349 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
350 * @param input_H_output The homography to check which transforms points by following equation: inputPoint = input_H_output * outputPoint, must be valid
351 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
352 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
353 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
354 */
355 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
356
357 private:
358
359 /**
360 * Resizes a given frame by a nearest pixel search.
361 * @param source The source frame buffer, must be valid
362 * @param target The target frame buffer, must be valid
363 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
364 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
365 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
366 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
367 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
368 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
369 * @param firstTargetRow First (including) row to convert, with range [0, targetHeight)
370 * @param numberTargetRows Number of rows to convert, with range [1, targetHeight - firstTargetRow]
371 * @tparam T Data type the pixel channel values
372 * @tparam tChannels Number of data channels, range: [1, infinity)
373 */
374 template <typename T, unsigned int tChannels>
375 static void resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
376
377 /**
378 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation
379 * @param input The input frame that will be transformed, must be valid
380 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
381 * @param inputHeight Height of both input images pixel, with range [1, infinity)
382 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
383 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
384 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
385 * @param output The output frame using the given affine transform, must be valid
386 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
387 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
388 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
389 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
390 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
391 * @tparam tChannels Number of frame channels, range: [1, infinity)
392 */
393 template <unsigned int tChannels>
394 static void affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
395
396 /**
397 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation.
398 * @param input The input frame that will be transformed, must be valid
399 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
400 * @param inputHeight Height of both input images pixel, with range [1, infinity)
401 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
402 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
403 * @param output The output frame using the given homography, must be valid
404 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
405 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
406 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
407 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
408 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
409 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
410 * @tparam T Data type of each pixel channel, e.g., float, double, int
411 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
412 */
413 template <typename T, unsigned int tChannels>
414 static void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
415
416 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
417
418 /**
419 * Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolation (using SSE)
420 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
421 * @param input The input frame that will be transformed
422 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
423 * @param inputHeight Height of both input images pixel, with range [1, infinity)
424 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
425 * @param affineTransform Affine transformation which is applied to input frame.
426 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
427 * @param output The output frame where the result of the transformation will be stored
428 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
429 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
430 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
431 * @param firstOutputRow The first output row to be handled
432 * @param numberOutputRows Number of output rows to be handled
433 * @tparam tChannels Number of frame channels
434 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
435 */
436 template <unsigned int tChannels>
437 static inline void affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
438
439 /**
440 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using SSE).
441 * @param input The input frame that will be transformed, must be valid
442 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
443 * @param inputHeight Height of both input images pixel, with range [1, infinity)
444 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
445 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0 to each channel
446 * @param output The output frame using the given homography, must be valid
447 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
448 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
449 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
450 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
451 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
452 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
453 * @tparam T Data type of each pixel channel, e.g., float, double, int
454 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
455 */
456 template <typename T, unsigned int tChannels>
457 static void homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
458
459 #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
460
461 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
462
463 /**
464 * Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using NEON and integer fixed-point arithmetic)
465 * @param input The input frame that will be transformed, must be valid
466 * @param inputWidth Width of both input images in pixel, with range [1, 65536)
467 * @param inputHeight Height of both input images pixel, with range [1, 65536)
468 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
469 * @param affineTransform Affine transformation used to transform the given input frame, must be valid
470 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
471 * @param output The output frame using the given affine transform, must be valid
472 * @param outputWidth The width of the output image in pixel, with range [1, 65536)
473 * @param outputHeight The height of the output image in pixel, with range [1, 65536)
474 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
475 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
476 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
477 * @tparam tChannels Number of frame channels
478 */
479 template <unsigned int tChannels>
480 static inline void affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
481
482 /**
483 * Copies the image content of an input image to a subset of an output image by application of a given homography transformation (using NEON).
484 * Beware: The output width 'outputWidth' must be >= 4, use homographySubset for small output frames
485 * @param input The input frame that will be transformed
486 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
487 * @param inputHeight Height of both input images pixel, with range [1, infinity)
488 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
489 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, @c nullptr to assign 0 to each channel
490 * @param output The output frame using the given homography
491 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
492 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
493 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
494 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
495 * @param firstOutputRow The first output row to be handled
496 * @param numberOutputRows Number of output rows to be handled
497 * @tparam T Data type of each pixel channel, e.g., float, double, int
498 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
499 * @see homographySubset().
500 */
501 template <typename T, unsigned int tChannels>
502 static inline void homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
503
504 #endif // OCEAN_HARDWARE_NEON_VERSION
505
506 /**
507 * Transforms an 8 bit per channel frame using the given homography.
508 * @param input The input frame that will be transformed, must be valid
509 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
510 * @param inputHeight Height of both input images pixel, with range [1, infinity)
511 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
512 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
513 * @param output The output frame using the given homography, must be valid
514 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
515 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
516 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
517 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
518 * @param outputOriginX The horizontal coordinate of the output frame's origin
519 * @param outputOriginY The vertical coordinate of the output frame's origin
520 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
521 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
522 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight)
523 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
524 * @tparam tChannels Number of frame channels
525 */
526 template <unsigned int tChannels>
527 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
528
529 /**
530 * Transforms a subset of a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
531 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
532 * @param input the input frame which will be transformed, must be valid
533 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
534 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
535 * @param lookupTable The lookup table which defines the transformation, must be valid
536 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
537 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
538 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
539 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
540 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
541 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
542 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
543 * @tparam tChannels Number of channels of the frame
544 */
545 template <unsigned int tChannels>
546 static void transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
547
548 /**
549 * Transforms a given input frame (with zipped pixel format) into an output frame by application of an interpolation lookup table.
550 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
551 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
552 * @param input The input frame which will be transformed, must be valid
553 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
554 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
555 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
556 * @param lookupTable The lookup table which defines the transformation, must be valid
557 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
558 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
559 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
560 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
561 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
562 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
563 * @param firstRow First row to be handled, with range [0, lookupTable->sizeY())
564 * @param numberRows Number of rows to be handled, with range [1, lookupTable->sizeY() - firstRow]
565 * @tparam tChannels Number of channels of the frame
566 */
567 template <unsigned int tChannels>
568 static void transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
569};
570
571inline bool FrameInterpolatorNearestPixel::Comfort::resize(Frame& frame, const unsigned int targetWidth, const unsigned int targetHeight, Worker* worker)
572{
573 ocean_assert(frame && targetWidth >= 1u && targetHeight >= 1u);
574
575 Frame tmpFrame(FrameType(frame, targetWidth, targetHeight));
576
577 if (!resize(frame, tmpFrame, worker))
578 {
579 return false;
580 }
581
582 tmpFrame.setTimestamp(frame.timestamp());
583 tmpFrame.setRelativeTimestamp(frame.relativeTimestamp());
584
585 frame = std::move(tmpFrame);
586 return true;
587}
588
589inline bool FrameInterpolatorNearestPixel::Comfort::rotate90(const Frame& input, Frame& output, const bool clockwise, Worker* worker)
590{
591 return FrameTransposer::Comfort::rotate90(input, output, clockwise, worker);
592}
593
594inline bool FrameInterpolatorNearestPixel::Comfort::rotate180(const Frame& input, Frame& output, Worker* worker)
595{
596 return FrameTransposer::Comfort::rotate180(input, output, worker);
597}
598
599inline bool FrameInterpolatorNearestPixel::Comfort::rotate(const Frame& input, Frame& output, const int angle, Worker* worker)
600{
601 return FrameTransposer::Comfort::rotate(input, output, angle, worker);
602}
603
604template <typename T, unsigned int tChannels>
605inline void FrameInterpolatorNearestPixel::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
606{
607 ocean_assert(source && target);
608
609 if (worker)
610 {
611 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::resizeSubset<T, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
612 }
613 else
614 {
615 resizeSubset<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
616 }
617}
618
619template <unsigned int tChannels>
620inline void FrameInterpolatorNearestPixel::affine8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& affineTransform, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
621{
622 // Merge the additional translation into the affine transformation
623 const SquareMatrix3 adjustedAffineTransform = affineTransform * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
624
625 if (worker)
626 {
627 if (outputWidth >= 4u)
628 {
629#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
630 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
631 return;
632#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
633 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
634 {
635 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 32u);
636 return;
637 }
638#endif
639 }
640
641 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::affine8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
642 }
643 else
644 {
645 if (outputWidth >= 4u)
646 {
647#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
648 affine8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
649 return;
650#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
651 if (inputWidth <= 65535u && inputHeight <= 65535u && outputWidth <= 65535u && outputHeight <= 65535u)
652 {
653 affine8BitPerChannelIntegerNEONSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
654 return;
655 }
656#endif
657 }
658
659 affine8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &adjustedAffineTransform, borderColor, output, outputWidth, outputHeight, outputPaddingElements, 0u, outputHeight);
660 }
661}
662
663template <typename T, unsigned int tChannels>
664inline void FrameInterpolatorNearestPixel::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
665{
666 static_assert(tChannels >= 1u, "Invalid channel number!");
667
668 // Merge the additional translation into the homography
669 const SquareMatrix3 input_H_adjustedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
670
671 typedef typename TypeMapper<T>::Type MappedTypeT;
672
673 if (worker)
674 {
675 if (outputWidth >= 4u)
676 {
677#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
678 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySSESubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
679 return;
680#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
681 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyNEONSubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
682 return;
683#endif
684 }
685
686 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographySubset<MappedTypeT, tChannels>, (const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
687 }
688 else
689 {
690 if (outputWidth >= 4u)
691 {
692#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
693 homographySSESubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
694 return;
695#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
696 homographyNEONSubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
697 return;
698#endif
699 }
700
701 homographySubset<MappedTypeT, tChannels>((const MappedTypeT*)(input), inputWidth, inputHeight, &input_H_adjustedOutput, (const MappedTypeT*)(borderColor), (MappedTypeT*)(output), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
702 }
703}
704
705template <unsigned int tChannels>
706inline void FrameInterpolatorNearestPixel::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3& input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker* worker, const uint8_t maskValue)
707{
708 if (worker)
709 {
710 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, 0u), 0u, outputHeight, 14u, 15u, 20u);
711 }
712 else
713 {
714 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &input_H_output, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, 0u, outputHeight);
715 }
716}
717
718template <unsigned int tChannels>
719inline void FrameInterpolatorNearestPixel::transform8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
720{
721 if (worker)
722 {
723 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transform8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 9u, 10u, 20u);
724 }
725 else
726 {
727 transform8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &lookupTable, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(lookupTable.sizeY()));
728 }
729}
730
731template <unsigned int tChannels>
732inline void FrameInterpolatorNearestPixel::transformMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable& lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
733{
734 if (worker)
735 {
736 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, 0u), 0u, (unsigned int)(lookupTable.sizeY()), 11u, 12u, 20u);
737 }
738 else
739 {
740 transformMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, inputPaddingElements, &lookupTable, offset, output, outputPaddingElements, outputMask, outputMaskPaddingElements, maskValue, 0u, (unsigned int)lookupTable.sizeY());
741 }
742}
743
744template <typename TElementType, unsigned int tChannels>
745inline void FrameInterpolatorNearestPixel::rotate90(const TElementType* source, TElementType* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
746{
747 static_assert(tChannels >= 1u, "Invalid channel number!");
748
749 ocean_assert(source != nullptr && target != nullptr);
750 ocean_assert(source != target);
751 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
752
753 FrameTransposer::rotate90<TElementType, tChannels>(source, target, sourceWidth, sourceHeight, clockwise, sourcePaddingElements, targetPaddingElements, worker);
754}
755
756template <typename T, unsigned int tChannels>
757void FrameInterpolatorNearestPixel::resizeSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
758{
759 static_assert(tChannels > 0u, "Invalid channel number!");
760 static_assert(sizeof(T) != 0, "Invalid data type!");
761
762 ocean_assert(source != nullptr && target != nullptr);
763 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
764 ocean_assert(targetWidth != 0u && targetHeight != 0u);
765
766 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
767
768 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
769 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
770
771 Memory memoryHorizontalLookups = Memory::create<unsigned int>(targetWidth);
772 unsigned int* horizontalLookups = memoryHorizontalLookups.data<unsigned int>();
773
774 for (unsigned int tx = 0u; tx < targetWidth; ++tx)
775 {
776 const unsigned int sx = tx * sourceWidth / targetWidth;
777 ocean_assert(sx < sourceWidth);
778
779 horizontalLookups[tx] = sx * tChannels;
780 }
781
782 target += firstTargetRow * targetStrideElements;
783
784 for (unsigned int ty = firstTargetRow; ty < firstTargetRow + numberTargetRows; ++ty)
785 {
786 const unsigned int sy = ty * sourceHeight / targetHeight;
787 ocean_assert(sy < sourceHeight);
788
789 const T* const sourceRow = source + sy * sourceStrideElements;
790
791 for (unsigned int tx = 0; tx < targetWidth; ++tx)
792 {
793 const T* const sourcePointer = sourceRow + horizontalLookups[tx];
794
795 for (unsigned int n = 0u; n < tChannels; ++n)
796 {
797 *target++ = sourcePointer[n];
798 }
799 }
800
801 target += targetPaddingElements;
802 }
803}
804
805template <unsigned int tChannels>
806void FrameInterpolatorNearestPixel::affine8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
807{
808 static_assert(tChannels >= 1u, "Invalid channel number!");
809
810 ocean_assert(input != nullptr && output != nullptr);
811 ocean_assert(inputWidth > 0u && inputHeight > 0u);
812 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
813 ocean_assert(affineTransform);
814 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
815
816 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
817
818 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
819
820 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
821 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
822
823 PixelType* outputData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
824
825 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
826 {
827 //
828 // We can slightly optimize the 3x3 matrix multiplication:
829 //
830 // | X0 Y0 Z0 | | x |
831 // | X1 Y1 Z1 | * | y |
832 // | 0 0 1 | | 1 |
833 //
834 // | xx | | X0 * x | | Y0 * y + Z0 |
835 // | yy | = | X1 * x | + | Y1 * y + Z1 |
836 //
837 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
838 //
839 // C0 = Y0 * y + Z0
840 // C1 = Y1 * y + Z1
841 //
842 // So the computation becomes:
843 //
844 // | x' | | X0 * x | | C0 |
845 // | y' | = | X1 * x | + | C1 |
846 //
847
848 const Vector2 X(affineTransform->data() + 0);
849 const Vector2 c(Vector2(affineTransform->data() + 3) * Scalar(y) + Vector2(affineTransform->data() + 6));
850
851 for (unsigned int x = 0u; x < outputWidth; ++x)
852 {
853 const Vector2 inputPosition = X * Scalar(x) + c;
854
855#ifdef OCEAN_DEBUG
856 const Scalar debugX = (*affineTransform)[0] * Scalar(x) + (*affineTransform)[3] * Scalar(y) + (*affineTransform)[6];
857 const Scalar debugY = (*affineTransform)[1] * Scalar(x) + (*affineTransform)[4] * Scalar(y) + (*affineTransform)[7];
858 ocean_assert(inputPosition.isEqual(Vector2(debugX, debugY), Scalar(0.01)));
859#endif
860
861 const unsigned int inputX = Numeric::round32(inputPosition.x());
862 const unsigned int inputY = Numeric::round32(inputPosition.y());
863
864 if (inputX < inputWidth && inputY < inputHeight)
865 *outputData = *(PixelType*)(input + inputY * (inputWidth * tChannels + inputPaddingElements) + inputX * tChannels);
866 else
867 *outputData = *bColor;
868
869 outputData++;
870 }
871
872 outputData = (PixelType*)((uint8_t*)outputData + outputPaddingElements);
873 }
874}
875
876template <typename T, unsigned int tChannels>
877void FrameInterpolatorNearestPixel::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
878{
879 static_assert(tChannels > 0u, "Invalid channel number!");
880
881 ocean_assert(input != nullptr && output != nullptr);
882 ocean_assert(inputWidth > 0u && inputHeight > 0u);
883 ocean_assert(outputWidth > 0u && outputHeight > 0u);
884 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
885
886 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
887
888 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
889 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
890
891 typedef typename DataType<T, tChannels>::Type PixelType;
892
893 const T zeroColor[tChannels] = {T(0)};
894 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
895
896 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
897 {
898 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
899
900 for (unsigned int x = 0u; x < outputWidth; ++x)
901 {
902 const Vector2 outputPosition = Vector2(Scalar(x), Scalar(y));
903 const Vector2 inputPosition(*input_H_output * outputPosition);
904
905 const unsigned int inputX = Numeric::round32(inputPosition.x());
906 const unsigned int inputY = Numeric::round32(inputPosition.y());
907
908 if (inputX < inputWidth && inputY < inputHeight)
909 {
910 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
911 }
912 else
913 {
914 *outputData = bColor;
915 }
916
917 outputData++;
918 }
919 }
920}
921
922#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
923
924template <unsigned int tChannels>
925inline void FrameInterpolatorNearestPixel::affine8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
926{
927 static_assert(tChannels >= 1u, "Invalid channel number!");
928
929 ocean_assert(input && output);
930 ocean_assert(inputWidth > 0u && inputHeight > 0u);
931 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
932 ocean_assert(affineTransform);
933 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
934
935 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
936
937 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
938
939 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
940 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
941
942 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
943
944 OCEAN_ALIGN_DATA(16)
945 unsigned int nearestNeighbours[4];
946
947 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
948 const __m128 m128_f_X0 = _mm_set_ps1(float((*affineTransform)(0, 0)));
949 const __m128 m128_f_X1 = _mm_set_ps1(float((*affineTransform)(1, 0)));
950
951 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
952 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputWidth * tChannels + inputPaddingElements);
953
954 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
955 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
956
957 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
958 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
959
960 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
961 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
962
963 // m128_i_zero = [0, 0, 0, 0]
964 const __m128i m128_i_zero = _mm_setzero_si128();
965
966 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
967 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
968
969 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
970 {
971 // We can slightly optimize the 3x3 matrix multiplication:
972 //
973 // | X0 Y0 Z0 | | x |
974 // | X1 Y1 Z1 | * | y |
975 // | 0 0 1 | | 1 |
976 //
977 // | xx | | X0 * x | | Y0 * y + Z0 |
978 // | yy | = | X1 * x | + | Y1 * y + Z1 |
979 //
980 // As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
981 //
982 // C0 = Y0 * y + Z0
983 // C1 = Y1 * y + Z1
984 //
985 // So the computation becomes:
986 //
987 // | x' | | X0 * x | | C0 |
988 // | y' | = | X1 * x | + | C1 |
989
990 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
991 const __m128 m128_f_C0 = _mm_set_ps1(float((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2)));
992 const __m128 m128_f_C1 = _mm_set_ps1(float((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2)));
993
994 for (unsigned int x = 0u; x < outputWidth; x += 4u)
995 {
996 if (x + 4u > outputWidth)
997 {
998 // the last iteration will not fit into the output frame,
999 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1000
1001 ocean_assert(x >= 4u && outputWidth > 4u);
1002 const unsigned int newX = outputWidth - 4u;
1003
1004 ocean_assert(x > newX);
1005 outputPixelData -= x - newX;
1006
1007 x = newX;
1008
1009 // the for loop will stop after this iteration
1010 ocean_assert(!(x + 4u < outputWidth));
1011 }
1012
1013 // we need four successive x coordinate floats:
1014 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1015 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1016
1017 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1018 const __m128 m128_f_inputX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1019 const __m128 m128_f_inputY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1020
1021 // Compute the coordinates of the nearest neighbors
1022 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1023 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1024
1025 // Note: Detection of input position outside the input image
1026 //
1027 // If the input point is outside the input image, then set the index
1028 // of its nearest neighbor to a value that is above the number of
1029 // available pixels in the image. When writing to the output, a
1030 // check will make sure to use the background color for those
1031 // pixels:
1032 //
1033 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1034 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1035 //
1036 // This approach keeps the amount of data that has to be transferred
1037 // between SSE and CPU registers to a minimum.
1038
1039 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1040 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1041 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1042 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1043
1044 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1045 // nearestNeighborsElement = (isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1046 const __m128i m_128_i_nearestNeighborElements = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels)));
1047 _mm_store_si128((__m128i*)nearestNeighbours, m_128_i_nearestNeighborElements);
1048
1049 // Update the output pixels
1050 outputPixelData[0] = nearestNeighbours[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[0]) : *bColor;
1051 outputPixelData[1] = nearestNeighbours[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[1]) : *bColor;
1052 outputPixelData[2] = nearestNeighbours[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[2]) : *bColor;
1053 outputPixelData[3] = nearestNeighbours[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighbours[3]) : *bColor;
1054
1055 outputPixelData += 4u;
1056 }
1057
1058 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1059 }
1060}
1061
1062template <typename T, unsigned int tChannels>
1063void FrameInterpolatorNearestPixel::homographySSESubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1064{
1065 static_assert(tChannels > 0u, "Invalid channel number!");
1066
1067 ocean_assert(input != nullptr && output != nullptr);
1068 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1069 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1070 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1071
1072 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1073
1074 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1075 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1076
1077 typedef typename DataType<T, tChannels>::Type PixelType;
1078
1079 const T zeroColor[tChannels] = {T(0)};
1080 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1081
1082 OCEAN_ALIGN_DATA(16) unsigned int nearestNeighbourElementOffsets[4];
1083
1084 // | X0 Y0 Z0 | | x |
1085 // Homography H = | X1 Y1 Z1 |, point p = | y |
1086 // | X2 Y2 Z2 | | 1 |
1087 //
1088 // | xx |
1089 // pp = H * p = | yy |
1090 // | zz |
1091 //
1092 // | xx | | X0 Y0 Z0 | | x |
1093 // <=> | yy | = | X1 Y1 Z1 | * | y |
1094 // | zz | | X2 Y2 Z2 | | 1 |
1095 //
1096 // | xx | | X0 * x | | Y0 * y + Z0 |
1097 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1098 // | zz | | X2 * x | | Y2 * y + Z2 |
1099 //
1100 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1101 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1102 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1103 //
1104 // Where C is a constant term that can be pre-computed (per image row)
1105 //
1106 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1107 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1108
1109 // [Xi, Xi, Xi, Xi], i = {0, 1, 2}
1110 const __m128 m128_f_X0 = _mm_set_ps1((float)(*input_H_output)(0, 0));
1111 const __m128 m128_f_X1 = _mm_set_ps1((float)(*input_H_output)(1, 0));
1112 const __m128 m128_f_X2 = _mm_set_ps1((float)(*input_H_output)(2, 0));
1113
1114 // Store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1115 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
1116
1117 const unsigned int inputPixelElementIndexEnd = inputHeight * inputStrideElements;
1118
1119 // m128_i_inputWidth_1 = [inputWidth - 1u, inputWidth - 1u, inputWidth - 1u, inputWidth - 1u]
1120 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(inputWidth - 1u);
1121
1122 // m128_i_inputHeight_1 = [inputHeight - 1u, inputHeight - 1u, inputHeight - 1u, inputHeight - 1u]
1123 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(inputHeight - 1u);
1124
1125 // [tChannels, tChannels, tChannels tChannels]
1126 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
1127
1128 // m128_i_zero = [0, 0, 0, 0]
1129 const __m128i m128_i_zero = _mm_setzero_si128();
1130
1131 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1132 {
1133 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1134
1135 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1136 const __m128 m128_f_C0 = _mm_set_ps1((float)((*input_H_output)(0, 1) * Scalar(y) + ((*input_H_output)(0, 2))));
1137 const __m128 m128_f_C1 = _mm_set_ps1((float)((*input_H_output)(1, 1) * Scalar(y) + ((*input_H_output)(1, 2))));
1138 const __m128 m128_f_C2 = _mm_set_ps1((float)((*input_H_output)(2, 1) * Scalar(y) + ((*input_H_output)(2, 2))));
1139
1140 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1141 {
1142 if (x + 4u > outputWidth)
1143 {
1144 // the last iteration will not fit into the output frame,
1145 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
1146
1147 ocean_assert(x >= 4u && outputWidth > 4u);
1148 const unsigned int newX = outputWidth - 4u;
1149
1150 ocean_assert(x > newX);
1151 outputPixelData -= x - newX;
1152
1153 x = newX;
1154
1155 // the for loop will stop after this iteration
1156 ocean_assert(!(x + 4u < outputWidth));
1157 }
1158
1159 // we need four successive x coordinate floats:
1160 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1161 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
1162
1163 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1164 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
1165 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
1166 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
1167
1168#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
1169
1170 // we calculate the (approximated) inverse of zz,
1171 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
1172 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1173 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
1174
1175 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1176 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
1177 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
1178
1179#else
1180
1181 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1182 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
1183 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
1184
1185#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
1186
1187 // Compute the coordinates of the nearest neighbors
1188 const __m128i m128_i_inputX = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputX, _MM_FROUND_TO_NEAREST_INT)); // x' = (int)round(x)
1189 const __m128i m128_i_inputY = _mm_cvtps_epi32(_mm_round_ps(m128_f_inputY, _MM_FROUND_TO_NEAREST_INT)); // y' = (int)round(y)
1190
1191 // Note: Detection of input position outside the input image
1192 //
1193 // If the input point is outside the input image, then set the index
1194 // of its nearest neighbor to a value that is above the number of
1195 // available pixels in the image. When writing to the output, a
1196 // check will make sure to use the background color for those
1197 // pixels:
1198 //
1199 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1200 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1201 //
1202 // This approach keeps the amount of data that has to be transferred
1203 // between SSE and CPU registers to a minimum.
1204
1205 // isOutsideImage = (inputX < 0 || inputX > (width - 1u) || inputY < 0 || inputY > (height - 1u) ? 0xFFFFFFFF : 0x00000000;
1206 const __m128i m128_i_isOutsideImage = _mm_or_si128(
1207 _mm_or_si128(_mm_cmplt_epi32(m128_i_inputX, m128_i_zero), _mm_cmplt_epi32(m128_i_inputY, m128_i_zero)),
1208 _mm_or_si128(_mm_cmpgt_epi32(m128_i_inputX, m128_i_inputWidth_1), _mm_cmpgt_epi32(m128_i_inputY, m128_i_inputHeight_1)));
1209
1210 // Compute pixel index of the nearest neighbors of the valid pixels and store their pixel values
1211 // m_128_i_nearestNeighbors = (isOutsideImage ? 0xFFFFFFFF : inputY * inputWidth + inputX)
1212 const __m128i m_128_i_nearestNeighbors = _mm_or_si128(m128_i_isOutsideImage, _mm_add_epi32(_mm_mullo_epi32(m128_i_inputY, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_inputX, m128_i_channels))); // nn = y' * inputWidth + x'
1213 _mm_store_si128((__m128i*)nearestNeighbourElementOffsets, m_128_i_nearestNeighbors);
1214
1215 // Update the output pixels
1216 outputPixelData[0] = nearestNeighbourElementOffsets[0] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1217 outputPixelData[1] = nearestNeighbourElementOffsets[1] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1218 outputPixelData[2] = nearestNeighbourElementOffsets[2] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1219 outputPixelData[3] = nearestNeighbourElementOffsets[3] < inputPixelElementIndexEnd ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1220
1221 outputPixelData += 4u;
1222 }
1223 }
1224}
1225
1226#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1227
1228#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1229
1230template <unsigned int tChannels>
1231void FrameInterpolatorNearestPixel::affine8BitPerChannelIntegerNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* affineTransform, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1232{
1233 // The following optimizations have been applied:
1234 //
1235 // - Matrix-vector multiplication for affine transformations:
1236 //
1237 // | x' | | X0 Y0 Z0 | | x |
1238 // | y' | = | X1 Y1 Z1 | * | y |
1239 // | 1 | | 0 0 1 | | 1 |
1240 //
1241 // which is
1242 //
1243 // x' = X0 * x + Y0 * y + Z0
1244 // y' = X1 * x + Y1 * y + Z1
1245 //
1246 // We can slightly optimize this operation, since y is constant within the inner
1247 // loop. The two terms on the right side in the above equations can be
1248 // pre-calculated:
1249 //
1250 // C0 = Y0 * y + Z0
1251 // C1 = Y1 * y + Z1
1252 //
1253 // So the computation becomes:
1254 //
1255 // | x' | | X0 * x | | C0 |
1256 // | y' | = | X1 * x | + | C1 |
1257 //
1258 // - For better utilization of cache coherence, the (output) image is processed
1259 // in blocks (64 x 64 pixels, if possible)
1260 //
1261 // - Integer fixed-point arithmetic.
1262 //
1263 // - Update products from floating point numbers with the beginning of blocks,
1264 // because the rounding error of fixed-point operations increases for larger
1265 // values:
1266 //
1267 // f - float number
1268 // i - fixed-point representation of f
1269 // v - coordinate value
1270 // eps = (f - i) - loss of precision (eps > 0)
1271 //
1272 // Rounding error:
1273 //
1274 // e = |(v * f) - (v * i)| = |v * (f - i)| = |v * eps|
1275 // (increases linearly for larger coordinate values v, i.e., with image size)
1276 //
1277 // The rounding error can be kept at bay by replacing the product (v * i) with
1278 // (v * f) at the beginning of each block followed by adding an offset for all
1279 // other pixels in the block, (N * f) where N is the number of pixels which
1280 // are processed concurrently by SIMD instructions.
1281 //
1282
1283 static_assert(tChannels >= 1u, "Invalid channel number!");
1284
1285 constexpr unsigned int fractionalBits = 15u;
1286 constexpr unsigned int totalBits = (unsigned int)(CHAR_BIT * sizeof(int));
1287
1288 static_assert((fractionalBits + 1u /* sign bit */) < totalBits, "Number of fractional bits exceeds number of total bits");
1289
1290 constexpr unsigned int maxImageEdgeLength = 1u << (totalBits - fractionalBits - 1u /* sign bit */);
1291
1292 // Scale to convert float value, v, to fixed-point value, v_q = int(round(fixedPointScale * v))
1293 constexpr Scalar fixedPointScale = Scalar(1u << fractionalBits);
1294
1295 // Number of pixels processed by NEON in each iteration
1296 constexpr unsigned int pixelsPerIteration = 4u;
1297
1298 ocean_assert(input && output);
1299 ocean_assert_and_suppress_unused(inputWidth > 0u && inputHeight > 0u && inputWidth <= maxImageEdgeLength && inputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1300 ocean_assert_and_suppress_unused(outputWidth >= pixelsPerIteration && outputHeight > 0u && outputWidth <= maxImageEdgeLength && outputHeight <= maxImageEdgeLength, maxImageEdgeLength);
1301 ocean_assert(affineTransform);
1302 ocean_assert(!affineTransform->isNull() && Numeric::isEqualEps((*affineTransform)[2]) && Numeric::isEqualEps((*affineTransform)[5]));
1303
1304 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1305
1306 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1307
1308 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1309 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
1310
1311 PixelType* outputPixelData = (PixelType*)(output + firstOutputRow * (outputWidth * tChannels + outputPaddingElements));
1312
1313 const unsigned int outputRowEnd = firstOutputRow + numberOutputRows;
1314
1315 // Inidices of the final nearest neighbor pixel, which are used to the interpolation
1316 unsigned int nearestNeighboursElements[4];
1317
1318 // Indices (of elements) above this value in the input image are considered as outside of the image (intentionally not counting the last padding elements)
1319 const unsigned int inputElementsEnd = inputHeight * inputWidth * tChannels + (inputHeight - 1u) * inputPaddingElements;
1320
1321 // m128_u_inputWidth = [inputWidth, intputWidth, intputWidth, intputWidth], and the same for inputHeight
1322 const uint32x4_t m128_u_inputWidth = vdupq_n_u32(inputWidth);
1323 const uint32x4_t m128_u_inputHeight = vdupq_n_u32(inputHeight);
1324
1325 // m128_u_inputStrideElements = [rowStride, rowStride, rowStride, rowStride], rowStride = inputWidth * tChannels + inputPaddingElements
1326 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputWidth * tChannels + inputPaddingElements);
1327
1328 // m128_u_channels = [tChannels, tChannels, tChannels, tChannels]
1329 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1330
1331 // m128_s_offsets_0123 = [0, 1, 2, 3]
1332 const int offsets_0123[4] = { 0, 1, 2, 3 };
1333 const int32x4_t m128_s_offsets_0123 = vld1q_s32(offsets_0123);
1334
1335 // m128_f_pixelsPerIteration = [4.0f, 4.0f, 4.0f, 4.0f]
1336 const float32x4_t m128_f_pixelsPerIteration = vdupq_n_f32((float)pixelsPerIteration);
1337
1338 // Float-based transformation value X0 multiplied with scale for fixed-point
1339 // numbers. This is used to update the fixed-point products, X0 * x and X1 * x,
1340 // at the beginning of each block, i.e.
1341 // m128_f_q_X0 = [v, v, v, v], v = fixedPointScale * X0, and the same for X1
1342 const float32x4_t m128_f_X0 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(0, 0)));
1343 const float32x4_t m128_f_X1 = vdupq_n_f32(float(fixedPointScale * (*affineTransform)(1, 0)));
1344
1345 // Increment that is added to fixed-point product computed at the beginning of
1346 // each block, X0 * x and X1 * x, in each iteration inside the block
1347 const int32x4_t m128_s_q_X0x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_pixelsPerIteration));
1348 const int32x4_t m128_s_q_X1x_increment = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_pixelsPerIteration));
1349
1350 // Determine the optimal block size
1351 constexpr unsigned int blockSize = 64u;
1352 constexpr unsigned int blockElements = blockSize * blockSize;
1353 const unsigned int blockWidth = std::min(blockElements / std::min(numberOutputRows, blockSize), outputWidth);
1354 const unsigned int blockHeight = std::min(blockElements / blockWidth, numberOutputRows);
1355 ocean_assert(blockWidth > 0u && blockWidth <= outputWidth);
1356 ocean_assert(blockHeight > 0u && blockHeight <= numberOutputRows);
1357
1358 // Index of pixel that is the last in a block of #pixelsPerIterations pixels, i.e. number of remaining pixels after
1359 // this point are less than #pixelsPerIterations. When this pixel index is reached all pointers will be moved left
1360 // so that we can process one last block of #pixelsPerIterations pixels. That also means that depending on the width
1361 // of the output image between [1, pixelsPerIterations) pixels will be computed a second time.
1362 const unsigned int lastMultipleNeonPixelBlockStart = outputWidth - pixelsPerIteration;
1363
1364 // m128_f_lastMultipleNeonPixelBlockStart = [(float)(lastMultipleNeonPixelBlockStart + 0), (float)(lastMultipleNeonPixelBlockStart + 1), (float)(lastMultipleNeonPixelBlockStart + 2), (float)(lastMultipleNeonPixelBlockStart + 3)]
1365 const float32x4_t m128_f_lastMultipleNeonPixelBlockStart = vcvtq_f32_s32(vaddq_s32(vdupq_n_s32((int)lastMultipleNeonPixelBlockStart), m128_s_offsets_0123));
1366
1367 // m128_s_q_X0x_lastMultipleNeonPixelBlockStart = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (lastMultipleNeonPixelBlockStart + i))), i = 0...3, and similarly for X1
1368 const int32x4_t m128_s_q_X0x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_lastMultipleNeonPixelBlockStart));
1369 const int32x4_t m128_s_q_X1x_lastMultipleNeonPixelBlockStart = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_lastMultipleNeonPixelBlockStart));
1370
1371 for (unsigned int blockYStart = firstOutputRow; blockYStart < outputRowEnd; blockYStart += blockHeight)
1372 {
1373 const unsigned int blockYEnd = std::min(blockYStart + blockHeight, outputRowEnd);
1374
1375 for (unsigned int blockXStart = 0u; blockXStart < outputWidth; blockXStart += blockWidth)
1376 {
1377 const unsigned int blockXEnd = std::min(blockXStart + blockWidth, outputWidth);
1378
1379 for (unsigned int y = blockYStart; y < blockYEnd; ++y)
1380 {
1381 outputPixelData = (PixelType*)(output + y * (outputWidth * tChannels + outputPaddingElements) + blockXStart * tChannels);
1382
1383 // Constant parts, cf. optimization of matrix-vector multiplication above
1384 // m128_s_C0 = [C0, C0, C0, C0], C0 = int(round(leftShiftFactor * (Y0 * y + Z0))), and similarly for C1
1385 const int32x4_t m128_s_q_C0 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(0, 1) * Scalar(y) + (*affineTransform)(0, 2))));
1386 const int32x4_t m128_s_q_C1 = vdupq_n_s32(Numeric::round32(fixedPointScale * ((*affineTransform)(1, 1) * Scalar(y) + (*affineTransform)(1, 2))));
1387
1388 // Update products, X0 * x and X1 * x, from floating point numbers with the
1389 // beginning of this block, since the rounding error of fixed-point operations
1390 // increases for larger coordinate values, cf. list of optimizations above.
1391 //
1392 // m128_s_x_0123 = [blockXStart + 0, blockXStart + 1, blockXStart + 2, blockXStart + 3]
1393 const int32x4_t m128_s_x_0123 = vaddq_s32(vdupq_n_s32(int(blockXStart)), m128_s_offsets_0123);
1394
1395 // m128_f_x_0123 = [(float)(x + 0), (float)(x + 1), (float)(x + 2), (float)(x + 3)]
1396 const float32x4_t m128_f_x_0123 = vcvtq_f32_s32(m128_s_x_0123);
1397
1398 // m128_s_q_X0x = [v0, v1, v2, v3], vi = int(round(fixedPointScale * X0 * (x + i))), i = 0...3, and similarly for X1
1399 int32x4_t m128_s_q_X0x = vcvtq_s32_f32(vmulq_f32(m128_f_X0, m128_f_x_0123));
1400 int32x4_t m128_s_q_X1x = vcvtq_s32_f32(vmulq_f32(m128_f_X1, m128_f_x_0123));
1401
1402 for (unsigned int x = blockXStart; x < blockXEnd; x += pixelsPerIteration)
1403 {
1404 if (x + pixelsPerIteration > outputWidth)
1405 {
1406 ocean_assert(x + pixelsPerIteration > outputWidth);
1407 ocean_assert(x >= pixelsPerIteration && outputWidth > pixelsPerIteration);
1408 ocean_assert(lastMultipleNeonPixelBlockStart == (outputWidth - pixelsPerIteration));
1409
1410 outputPixelData -= (x - lastMultipleNeonPixelBlockStart);
1411
1412 x = lastMultipleNeonPixelBlockStart;
1413
1414 m128_s_q_X0x = m128_s_q_X0x_lastMultipleNeonPixelBlockStart;
1415 m128_s_q_X1x = m128_s_q_X1x_lastMultipleNeonPixelBlockStart;
1416
1417 // the for loop will stop after this iteration
1418 ocean_assert(!(x + pixelsPerIteration < outputWidth));
1419 }
1420
1421 // Compute pixel location in the input image
1422 // m128_s_q_inputX = x' = C0 + X0 * x
1423 // m128_s_q_inputY = y' = C1 + X1 * y
1424 const int32x4_t m128_s_q_inputX = vaddq_s32(m128_s_q_C0, m128_s_q_X0x);
1425 const int32x4_t m128_s_q_inputY = vaddq_s32(m128_s_q_C1, m128_s_q_X1x);
1426
1427 // Convert (signed) fixed-point location to unsigned int, i.e., negative values
1428 // will be larger than image dimensions (width, height), cf. note below
1429 //
1430 // m128_u_inputX = (unsigned int) round(inputX >> N)
1431 // m128_u_inputY = (unsigned int) round(inputY >> N)
1432 const uint32x4_t m128_u_inputX = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputX, fractionalBits));
1433 const uint32x4_t m128_u_inputY = vreinterpretq_u32_s32(vrshrq_n_s32(m128_s_q_inputY, fractionalBits));
1434
1435 // Note: Detection of input position outside the input image
1436 //
1437 // If the input point is outside the input image, then set the index
1438 // of its nearest neighbor to a value that is above the number of
1439 // available pixels in the image. When writing to the output, a
1440 // check will make sure to use the background color for those
1441 // pixels:
1442 //
1443 // nearestNeighbour = isOutsideImage ? 0xFFFFFFFF : y * w + x
1444 // output = nearestNeighbour < inputPixelIndexEnd ? foregroundColor : backgroundColor.
1445 //
1446 // This approach keeps the amount of data that has to be transferred
1447 // between NEON and CPU registers to a minimum.
1448
1449 // Casting negative signed values to unsigned value results in very large values, e.g., ((unsigned int) -1) > inputWidth.
1450 // We'll exploit that below to check is pixel coordinates are outside the image.
1451 // m128_u_isOutsideImage = (x >= inputWidth || y >= inputHeight) ? 0xFFFFFFFF : 0x00000000;
1452 const uint32x4_t m128_u_isOutsideImage = vorrq_u32(vcgeq_u32(m128_u_inputX, m128_u_inputWidth), vcgeq_u32(m128_u_inputY, m128_u_inputHeight));
1453
1454 // Determine the pixel indices of the nearest neighbors and store the result
1455 // If the pixel is outside the image then set the index of the nearest neighbor to the largest possible value
1456 // m_128_u_nearestNeighbors = m128_u_isOutsideImage | (inputY * inputStrideElements) + (inputX * channels);
1457 // which is equivalent to
1458 // m_128_u_nearestNeighborElements = (m128_u_isOutsideImage ? 0xFFFFFFFF : (inputY * inputStrideElements) + (inputX * channels))
1459 const uint32x4_t m_128_u_nearestNeighborsElements = vorrq_u32(m128_u_isOutsideImage, vaddq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), vmulq_u32(m128_u_inputX, m128_u_channels)));
1460 vst1q_u32(nearestNeighboursElements, m_128_u_nearestNeighborsElements);
1461
1462 outputPixelData[0] = nearestNeighboursElements[0] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[0]) : *bColor;
1463 outputPixelData[1] = nearestNeighboursElements[1] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[1]) : *bColor;
1464 outputPixelData[2] = nearestNeighboursElements[2] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[2]) : *bColor;
1465 outputPixelData[3] = nearestNeighboursElements[3] < inputElementsEnd ? *(const PixelType*)(input + nearestNeighboursElements[3]) : *bColor;
1466
1467 outputPixelData += pixelsPerIteration;
1468
1469 // m128_s_q_X0x += m128_s_q_X0x_increment, and similarly for X1
1470 m128_s_q_X0x = vaddq_s32(m128_s_q_X0x, m128_s_q_X0x_increment);
1471 m128_s_q_X1x = vaddq_s32(m128_s_q_X1x, m128_s_q_X1x_increment);
1472 }
1473 }
1474 }
1475
1476 outputPixelData = (PixelType*)((uint8_t*)outputPixelData + outputPaddingElements);
1477 }
1478}
1479
1480template <typename T, unsigned int tChannels>
1481void FrameInterpolatorNearestPixel::homographyNEONSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1482{
1483 static_assert(tChannels >= 1u, "Invalid channel number!");
1484
1485 ocean_assert(input != nullptr && output != nullptr);
1486 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1487 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
1488 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1489
1490 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
1491
1492 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1493 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1494
1495 typedef typename DataType<T, tChannels>::Type PixelType;
1496
1497 const T zeroColor[tChannels] = {T(0)};
1498 const PixelType bColor = borderColor ? *(const PixelType*)(borderColor) : *(const PixelType*)(zeroColor);
1499
1500 unsigned int validPixels[4];
1501 unsigned int nearestNeighbourElementOffsets[4];
1502
1503 // | X0 Y0 Z0 | | x |
1504 // Homography H = | X1 Y1 Z1 |, point p = | y |
1505 // | X2 Y2 Z2 | | 1 |
1506 //
1507 // | xx |
1508 // pp = H * p = | yy |
1509 // | zz |
1510 //
1511 // | xx | | X0 Y0 Z0 | | x |
1512 // <=> | yy | = | X1 Y1 Z1 | * | y |
1513 // | zz | | X2 Y2 Z2 | | 1 |
1514 //
1515 // | xx | | X0 * x | | Y0 * y + Z0 |
1516 // <=> | yy | = | X1 * x | + | Y1 * y + Z1 |
1517 // | zz | | X2 * x | | Y2 * y + Z2 |
1518 //
1519 // | xx | | X0 * x | | C0 | | Y0 * y + Z0 |
1520 // <=> | yy | = | X1 * x | + | C1 |, C = | Y1 * y + Z1 |
1521 // | zz | | X2 * x | | C2 | | Y2 * y + Z2 |
1522 //
1523 // Where C is a constant term that can be pre-computed (per image row)
1524 //
1525 // | x' | | xx / zz | | (X0 * x + C0) / (X2 * x + C2) |
1526 // p' = | y' | = | yy / zz | = | (X1 * x + C1) / (X2 * x + C2) |
1527
1528 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
1529 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
1530 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
1531 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
1532
1533 // we store 4 floats: [0.5f, 0.5f, 0.5f, 0.5f]
1534 const float32x4_t m128_f_pointFive = vdupq_n_f32(0.5f);
1535 const float32x4_t m128_f_negPointFive = vdupq_n_f32(-0.5f);
1536
1537 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
1538 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
1539
1540 const uint32x4_t m128_u_channels = vdupq_n_u32(tChannels);
1541
1542 // we store 4 floats: [inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f, inputWidth - 0.5f], and same with inputHeight
1543 const float32x4_t m128_f_inputWidth_pointFive = vdupq_n_f32(float(inputWidth) - 0.5f);
1544 const float32x4_t m128_f_inputHeight_pointFive = vdupq_n_f32(float(inputHeight) - 0.5f);
1545
1546 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1547 {
1548 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
1549
1550 // Pre-compute the constant terms [Ci, Ci, Ci, Ci], i={0, 1, 2}
1551 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
1552 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
1553 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
1554
1555 for (unsigned int x = 0u; x < outputWidth; x += 4u)
1556 {
1557 if (x + 4u > outputWidth)
1558 {
1559 // Since the last iteration will not fit into the output frame, we'll shift N pixel left so that it fits again (at most 3 pixels).
1560
1561 ocean_assert(x >= 4u && outputWidth > 4u);
1562 const unsigned int newX = outputWidth - 4u;
1563
1564 ocean_assert(x > newX);
1565 outputPixelData -= x - newX;
1566
1567 x = newX;
1568
1569 // the for loop will stop after this iteration
1570 ocean_assert(!(x + 4u < outputWidth));
1571 }
1572
1573 // we need four successive x coordinate floats:
1574 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
1575 float x_0123[4] = { float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u) };
1576 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
1577
1578 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
1579 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
1580 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
1581 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
1582
1583#ifdef USE_DIVISION_ARM64_ARCHITECTURE
1584
1585 // using the division available from ARM64 is more precise
1586 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
1587 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
1588
1589#else
1590
1591 // we calculate the (approximated) inverse of zz
1592 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
1593 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
1594 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
1595
1596 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
1597 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
1598 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
1599
1600#endif // USE_DIVISION_ARM64_ARCHITECTURE
1601
1602 // Mark pixels inside the input image as valid, all others as invalid
1603 const uint32x4_t m128_u_validPixelX = vandq_u32(vcltq_f32(m128_f_inputX, m128_f_inputWidth_pointFive), vcgtq_f32(m128_f_inputX, m128_f_negPointFive)); // inputX < (inputWidth - 0.5) && inputX >= -0.5 ? 0xFFFFFFFF : 0x00000000
1604 const uint32x4_t m128_u_validPixelY = vandq_u32(vcltq_f32(m128_f_inputY, m128_f_inputHeight_pointFive), vcgtq_f32(m128_f_inputY, m128_f_negPointFive)); // inputY < (inputHeight - 0.5) && inputY > -0.5 ? 0xFFFFFFFF : 0x00000000
1605
1606 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
1607
1608 // Stop here if all pixels are invalid
1609 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
1610 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
1611 {
1612#ifdef OCEAN_DEBUG
1613 // clang-format off
1614 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
1615 // clang-format on
1616 vst1q_u32(debugValidPixels, m128_u_validPixel);
1617 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
1618#endif
1619
1620 outputPixelData[0] = bColor;
1621 outputPixelData[1] = bColor;
1622 outputPixelData[2] = bColor;
1623 outputPixelData[3] = bColor;
1624
1625 outputPixelData += 4;
1626
1627 continue;
1628 }
1629
1630 // Determine the pixel indices of the nearest neighbors and store the result
1631 vst1q_u32(validPixels, m128_u_validPixel);
1632 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
1633
1634 const uint32x4_t m128_u_inputX = vcvtq_u32_f32(vaddq_f32(m128_f_inputX, m128_f_pointFive)); // Round to nearest integer: x' = (int) (x + 0.5f)
1635 const uint32x4_t m128_u_inputY = vcvtq_u32_f32(vaddq_f32(m128_f_inputY, m128_f_pointFive)); // Round to nearest integer: y' = (int) (y + 0.5f)
1636 const uint32x4_t m_128_u_nearestNeighbourElementOffsets = vmlaq_u32(vmulq_u32(m128_u_inputY, m128_u_inputStrideElements), m128_u_inputX, m128_u_channels); // nn = y' * inputStrideElements + x' * channels
1637 vst1q_u32(nearestNeighbourElementOffsets, m_128_u_nearestNeighbourElementOffsets);
1638
1639#ifdef OCEAN_DEBUG
1640 unsigned int debugInputX[4];
1641 unsigned int debugInputY[4];
1642 vst1q_u32(debugInputX, m128_u_inputX);
1643 vst1q_u32(debugInputY, m128_u_inputY);
1644 ocean_assert(!validPixels[0] || (debugInputX[0] < inputWidth && debugInputY[0] < inputHeight));
1645 ocean_assert(!validPixels[1] || (debugInputX[1] < inputWidth && debugInputY[1] < inputHeight));
1646 ocean_assert(!validPixels[2] || (debugInputX[2] < inputWidth && debugInputY[2] < inputHeight));
1647 ocean_assert(!validPixels[3] || (debugInputX[3] < inputWidth && debugInputY[3] < inputHeight));
1648#endif
1649
1650 outputPixelData[0] = validPixels[0] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[0]) : bColor;
1651 outputPixelData[1] = validPixels[1] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[1]) : bColor;
1652 outputPixelData[2] = validPixels[2] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[2]) : bColor;
1653 outputPixelData[3] = validPixels[3] ? *(const PixelType*)(input + nearestNeighbourElementOffsets[3]) : bColor;
1654
1655 outputPixelData += 4;
1656 }
1657 }
1658}
1659
1660#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1661
1662template <unsigned int tChannels>
1663void FrameInterpolatorNearestPixel::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3* input_H_output, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
1664{
1665 static_assert(tChannels > 0u, "Invalid channel number!");
1666
1667 ocean_assert(input != nullptr && output != nullptr && outputMask != nullptr);
1668 ocean_assert(inputWidth > 0u && inputHeight > 0u);
1669 ocean_assert(outputWidth > 0u && outputHeight > 0u);
1670 ocean_assert(input_H_output != nullptr && !input_H_output->isSingular());
1671
1672 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
1673
1674 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1675 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1676 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1677
1678 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1679
1680 output += firstOutputRow * outputStrideElements;
1681 outputMask += firstOutputRow * outputMaskStrideElements;
1682
1683 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
1684 {
1685 PixelType* outputPixel = (PixelType*)(output);
1686
1687 for (unsigned int x = 0; x < outputWidth; ++x)
1688 {
1689 const Vector2 outputPosition = Vector2(Scalar(int(x) + outputOriginX), Scalar(int(y) + outputOriginY));
1690 const Vector2 inputPosition(*input_H_output * outputPosition);
1691
1692 const unsigned int inputX = Numeric::round32(inputPosition.x());
1693 const unsigned int inputY = Numeric::round32(inputPosition.y());
1694
1695 if (inputX < inputWidth && inputY < inputHeight)
1696 {
1697 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1698 *outputMask = maskValue;
1699 }
1700 else
1701 {
1702 *outputMask = 0xFFu - maskValue;
1703 }
1704
1705 ++outputPixel;
1706 ++outputMask;
1707 }
1708
1709 output += outputStrideElements;
1710 outputMask += outputMaskPaddingElements;
1711 }
1712}
1713
1714template <unsigned int tChannels>
1715void FrameInterpolatorNearestPixel::transform8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* lookupTable, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
1716{
1717 static_assert(tChannels > 0u, "Invalid channel number!");
1718
1719 ocean_assert(lookupTable != nullptr);
1720 ocean_assert(input != nullptr && output != nullptr);
1721
1722 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1723 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1724
1725 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1726
1727 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1728 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1729
1730 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1731
1732 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
1733 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
1734
1735 if (offset)
1736 {
1737 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1738 {
1739 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1740
1741 for (unsigned int x = 0u; x < outputWidth; ++x)
1742 {
1743 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1744 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1745
1746 const unsigned int inputX = Numeric::round32(inputPosition.x());
1747 const unsigned int inputY = Numeric::round32(inputPosition.y());
1748
1749 if (inputX < inputWidth && inputY < inputHeight)
1750 {
1751 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1752 }
1753 else
1754 {
1755 *outputData = *bColor;
1756 }
1757
1758 ++outputData;
1759 }
1760 }
1761 }
1762 else
1763 {
1764 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1765 {
1766 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
1767
1768 for (unsigned int x = 0u; x < outputWidth; ++x)
1769 {
1770 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1771
1772 const unsigned int inputX = Numeric::round32(inputPosition.x());
1773 const unsigned int inputY = Numeric::round32(inputPosition.y());
1774
1775 if (inputX < inputWidth && inputY < inputHeight)
1776 {
1777 *outputData = *((const PixelType*)(input + inputY * inputStrideElements) + inputX);
1778 }
1779 else
1780 {
1781 *outputData = *bColor;
1782 }
1783
1784 ++outputData;
1785 }
1786 }
1787 }
1788}
1789
1790template <unsigned int tChannels>
1791void FrameInterpolatorNearestPixel::transformMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable* lookupTable, const bool offset, uint8_t* output, const unsigned int outputPaddingElements, uint8_t* outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
1792{
1793 static_assert(tChannels > 0u, "Invalid channel number!");
1794
1795 ocean_assert(lookupTable != nullptr);
1796 ocean_assert(input != nullptr && output != nullptr);
1797
1798 ocean_assert(inputWidth != 0u && inputHeight != 0u);
1799 ocean_assert(firstRow + numberRows <= lookupTable->sizeY());
1800
1801 ocean_assert(NumericT<unsigned int>::isInsideValueRange(lookupTable->sizeX()));
1802 const unsigned int outputWidth = (unsigned int)(lookupTable->sizeX());
1803
1804 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
1805 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
1806 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
1807
1808 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
1809
1810 output += firstRow * outputStrideElements;
1811 outputMask += firstRow * outputMaskStrideElements;
1812
1813 if (offset)
1814 {
1815 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1816 {
1817 PixelType* outputPixel = (PixelType*)(output);
1818
1819 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1820 {
1821 const Vector2 inputOffset(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1822 const Vector2 inputPosition(Scalar(x) + inputOffset.x(), Scalar(y) + inputOffset.y());
1823
1824 const unsigned int inputX = Numeric::round32(inputPosition.x());
1825 const unsigned int inputY = Numeric::round32(inputPosition.y());
1826
1827 if (inputX < inputWidth && inputY < inputHeight)
1828 {
1829 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1830 *outputMask = maskValue;
1831 }
1832 else
1833 {
1834 *outputMask = 0xFF - maskValue;
1835 }
1836
1837 ++outputPixel;
1838 ++outputMask;
1839 }
1840
1841 output += outputStrideElements;
1842 outputMask += outputMaskPaddingElements;
1843 }
1844 }
1845 else
1846 {
1847 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
1848 {
1849 PixelType* outputPixel = (PixelType*)(output);
1850
1851 for (unsigned int x = 0u; x < lookupTable->sizeX(); ++x)
1852 {
1853 const Vector2 inputPosition(lookupTable->bilinearValue(Scalar(x), Scalar(y)));
1854
1855 const unsigned int inputX = Numeric::round32(inputPosition.x());
1856 const unsigned int inputY = Numeric::round32(inputPosition.y());
1857
1858 if (inputX < inputWidth && inputY < inputHeight)
1859 {
1860 *outputPixel = *((PixelType*)(input + inputY * inputStrideElements + inputX * tChannels));
1861 *outputMask = maskValue;
1862 }
1863 else
1864 {
1865 *outputMask = 0xFF - maskValue;
1866 }
1867
1868 ++outputPixel;
1869 ++outputMask;
1870 }
1871
1872 output += outputStrideElements;
1873 outputMask += outputMaskPaddingElements;
1874 }
1875 }
1876}
1877
1878} // namespace CV
1879
1880} // namespace Ocean
1881
1882#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_NEAREST_PIXEL_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorNearestPixel.h:49
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
Definition FrameInterpolatorNearestPixel.h:589
static bool affine(const Frame &input, Frame &output, const SquareMatrix3 &input_A_output, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Applies an affine image transformation to a frame (with zipped pixel format) and renders using neares...
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search.
static bool transform(const Frame &input, Frame &output, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, Worker *worker=nullptr)
Transforms a given input frame (with 1 plane) into an output frame by application of an interpolation...
static bool transformMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &lookupTable, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFFu, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame (with arbitrary frame ...
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
Definition FrameInterpolatorNearestPixel.h:594
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
Definition FrameInterpolatorNearestPixel.h:599
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame (with zipped pixel format) into an output frame by application of a ho...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorNearestPixel.h:189
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements a nearest pixel frame interpolator.
Definition FrameInterpolatorNearestPixel.h:35
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame by a nearest pixel search and uses several CPU cores to speed update the proces...
Definition FrameInterpolatorNearestPixel.h:605
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:664
static void affine8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_A_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Applies an affine image transformation to an 8 bit per channel input frame and renders the output.
Definition FrameInterpolatorNearestPixel.h:620
static void transform8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:719
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition FrameInterpolatorNearestPixel.h:39
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:877
static void resizeSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a given frame by a nearest pixel search.
Definition FrameInterpolatorNearestPixel.h:757
static void rotate90(const TElementType *source, TElementType *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const bool clockwise, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degree.
Definition FrameInterpolatorNearestPixel.h:745
static void affine8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:925
static void transformMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable *lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:1791
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorNearestPixel.h:1663
static void affine8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Apply an affine image transformation to an 8 bit per channel frame using nearest neighbor interpolati...
Definition FrameInterpolatorNearestPixel.h:806
static void homographySSESubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1063
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 &input_H_output, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorNearestPixel.h:706
static void affine8BitPerChannelIntegerNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const SquareMatrix3 *affineTransform, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Affine image transformation for 8-bit per channel frames using nearest neighbor interpolation (using ...
Definition FrameInterpolatorNearestPixel.h:1231
static void transform8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *lookupTable, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame (with zipped pixel format) into an output frame by applica...
Definition FrameInterpolatorNearestPixel.h:1715
static void transformMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int inputPaddingElements, const LookupTable &lookupTable, const bool offset, uint8_t *output, const unsigned int outputPaddingElements, uint8_t *outputMask, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame (with zipped pixel format) into an output frame by application of an i...
Definition FrameInterpolatorNearestPixel.h:732
static void homographyNEONSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Copies the image content of an input image to a subset of an output image by application of a given h...
Definition FrameInterpolatorNearestPixel.h:1481
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static bool rotate(const Frame &input, Frame &output, const int angle, Worker *worker=nullptr)
Rotates a given frame with 90 degree steps.
static bool rotate180(const Frame &input, Frame &output, Worker *worker=nullptr)
Rotates a given frame by 180 degrees.
static bool rotate90(const Frame &input, Frame &output, const bool clockwise, Worker *worker=nullptr)
Rotates a given frame either clockwise or counter-clockwise by 90 degrees.
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:65
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:470
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:458
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2876
This class implements Ocean's image class.
Definition Frame.h:1808
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4233
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4228
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4218
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4223
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
T bilinearValue(const TScalar x, const TScalar y) const
Applies a lookup for a specific position in this lookup object.
Definition Lookup2.h:1815
This class implements an object able to allocate memory.
Definition base/Memory.h:22
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr int32_t round32(const T value)
Returns the rounded 32 bit integer value of a given value.
Definition Numeric.h:2064
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2087
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1046
bool isSingular() const
Returns whether this matrix is singular (and thus cannot be inverted).
Definition SquareMatrix3.h:1341
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:42
float Scalar
Definition of a scalar type.
Definition Math.h:129
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32