Ocean
FrameInterpolatorBilinear.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
9 #define META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
10 
11 #include "ocean/cv/CV.h"
12 #include "ocean/cv/FrameBlender.h"
13 #include "ocean/cv/PixelPosition.h"
14 #include "ocean/cv/SSE.h"
15 
16 #include "ocean/base/DataType.h"
17 #include "ocean/base/Frame.h"
18 #include "ocean/base/Memory.h"
19 #include "ocean/base/Worker.h"
20 
22 
23 #include "ocean/math/AnyCamera.h"
26 #include "ocean/math/Lookup2.h"
28 #include "ocean/math/Quaternion.h"
31 #include "ocean/math/Vector2.h"
32 
33 namespace Ocean
34 {
35 
36 namespace CV
37 {
38 
39 /**
40  * This class implements bilinear frame interpolator functions.
41  * @ingroup cv
42  */
43 class OCEAN_CV_EXPORT FrameInterpolatorBilinear
44 {
45  public:
46 
47  /**
48  * Definition of a lookup table for 2D vectors.
49  */
51 
52  public:
53 
54  /**
55  * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
56  * Best practice is to avoid using these functions if binary size matters,<br>
57  * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
58  */
59  class OCEAN_CV_EXPORT Comfort
60  {
61  public:
62 
63  /**
64  * Resizes/rescales a given frame by application of a bilinear interpolation.
65  * @param source The source frame to resize, must be valid
66  * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
67  * @param worker Optional worker object used for load distribution
68  * @return True, if the frame could be resized
69  */
70  static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
71 
72  /**
73  * Resizes/rescales a given frame by application of a bilinear interpolation.
74  * @param frame The frame to resize, must be valid
75  * @param width The width of the resized frame in pixel, with range [1, infinity)
76  * @param height The height of the resized frame in pixel, with range [1, infinity)
77  * @param worker Optional worker object used for load distribution
78  * @return True, if the frame could be resized
79  */
80  static inline bool resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker = nullptr);
81 
82  /**
83  * Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
84  * The resulting zoomed image will have the same frame type (frame resolution, pixel format, pixel origin) as the input image.<br>
85  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
86  * @param source The source frame for which the zoomed image content will be created, must be valid
87  * @param target The resulting target frame which will receive the zoomed image, will be set to the same frame type as the source frame, can be invalid
88  * @param zoomFactor The zoom factor to be applied, a factor < 1 will zoom out, a factor > 1 will zoom in, with range (0, infinity)
89  * @param worker Optional worker object to distribute the computation to several CPU cores
90  * @return True, if succeeded
91  */
92  static bool zoom(const Frame& source, Frame& target, const Scalar zoomFactor, Worker* worker = nullptr);
93 
94  /**
95  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
96  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
97  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
98  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
99  * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
100  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
101  * @param input The input frame that will be transformed, must be valid
102  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
103  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
104  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels and the data type of the pixel elements, nullptr to assign 0 to each channel
105  * @param worker Optional worker object to distribute the computational load
106  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
107  * @return True, if succeeded
108  */
109  static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
110 
111  /**
112  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
113  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
114  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
115  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
116  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
117  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
118  * @param input The input frame that will be transformed
119  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
120  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
121  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
122  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
123  * @param worker Optional worker object to distribute the computational load
124  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
125  * @return True, if succeeded
126  */
127  static bool homographies(const Frame& input, Frame& output, const SquareMatrix3 homographies[4], const Vector2& outputQuadrantCenter, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
128 
129  /**
130  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
131  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
132  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
133  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
134  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
135  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
136  * @param input The input frame that will be transformed, must be valid
137  * @param output The Output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
138  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid and must have the same frame dimension as the output frame
139  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
140  * @param worker Optional worker object to distribute the computational load
141  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
142  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
143  * @return True, if succeeded
144  * @see coversHomographyInputFrame().
145  */
146  static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
147 
148  /**
149  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
150  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
151  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
152  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
153  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
154  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
155  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
156  * @param input The input frame that will be transformed, must be valid
157  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
158  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
159  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
160  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
161  * @param worker Optional worker object to distribute the computational load
162  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
163  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
164  * @return True, if succeeded
165  * @see coversHomographyInputFrame().
166  */
167  static bool homographiesMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3* homographies, const Vector2& outputQuadrantCenter, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
168 
169  /**
170  * Transforms a given input frame into an output frame by application of a homography.
171  * This function also uses a camera profile to improve the interpolation accuracy.<br>
172  * The given homography is transformed into a homography for normalized image coordinates.<br>
173  * Thus, also distortion parameters of the camera profile can be applied.<br>
174  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
175  * @param inputCamera The pinhole camera profile to be applied for the input frame
176  * @param outputCamera The pinhole camera profile to be applied for the output frame
177  * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
178  * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
179  * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
180  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
181  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
182  * @param worker Optional worker object to distribute the computational load
183  * @return True, if succeeded
184  * @see homographyWithCameraMask(), homography().
185  */
186  static bool homographyWithCamera(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const Frame& input, Frame& output, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor = nullptr, Worker* worker = nullptr);
187 
188  /**
189  * Transforms a given input frame into an output frame by application of a homography.
190  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
191  * This function also uses a camera profile to improve the interpolation accuracy.<br>
192  * The given homography is transformed into a homography for normalized image coordinates.<br>
193  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
194  * Thus, also distortion parameters of the camera profile can be applied.<br>
195  * @param inputCamera The pinhole camera profile to be applied for the input frame
196  * @param outputCamera The pinhole camera profile to be applied for the output frame
197  * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
198  * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
199  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
200  * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
201  * @param worker Optional worker object to distribute the computational load
202  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
203  * @return True, if succeeded
204  * @see homographyWithCamera(), homography().
205  */
206  static bool homographyWithCameraMask(const AnyCamera& inputCamera, const AnyCamera& outputCamera, const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& homography, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
207 
208  /**
209  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
210  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
211  * Information: This function is the equivalent to OpenCV's cv::remap().
212  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
213  * @param input The input frame that will be transformed
214  * @param output Resulting output frame, the dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
215  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
216  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
217  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
218  * @param worker Optional worker object to distribute the computation
219  * @return True, if succeeded
220  */
221  static bool lookup(const Frame& input, Frame& output, const LookupTable& input_LT_output, const bool offset, const void* borderColor, Worker* worker = nullptr);
222 
223  /**
224  * Transforms a given input frame into an output frame by application of an interpolation lookup table and creates and additional mask as output.
225  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
226  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
227  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
228  * @param input The input frame which will be transformed
229  * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
230  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
231  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
232  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
233  * @param worker Optional worker object to distribute the computation
234  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
235  * @return True, if succeeded
236  */
237  static bool lookupMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& input_LT_output, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
238 
239  /**
240  * Applies an affine transformation to an image.
241  * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.
242  * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation.
243  * The multiplication of the affine transformation with pixel location in the target image yield their location in the source image, i.e., sourcePoint = source_A_target * targetPoint.
244  * The parameter 'targetOrigin' applies an additional translation to the provided affine transformation i.e., source_A_target * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
245  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
246  * <pre>
247  * a c e
248  * b d f
249  * 0 0 1
250  * </pre>
251  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
252  * Information: This function is the equivalent to OpenCV's cv::warpAffine().
253  * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
254  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
255  * @param source The source frame that will be transformed, must be valid
256  * @param target The resulting frame after applying the affine transformation to the source frame; pixel format and pixel origin must be identical to source frame; memory of target frame must be allocated by the caller
257  * @param source_A_target Affine transform used to transform the given source frame, transforming points defined in the target frame into points defined in the source frame
258  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
259  * @param worker Optional worker object to distribute the computational load
260  * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
261  * @return True, if succeeded
262  */
263  static bool affine(const Frame& source, Frame& target, const SquareMatrix3& source_A_target, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& targetOrigin = PixelPositionI(0, 0));
264 
265  /**
266  * Rotates a given frame by a bilinear interpolation.
267  * The frame will be rotated around a specified anchor position (inside or outside the frame).<br>
268  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
269  * @param source The source frame to be rotated, must be valid
270  * @param target The target frame which will receive the rotated image, will be set to the same frame type as the source frame, can be invalid
271  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
272  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
273  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
274  * @param worker Optional worker object to distribute the computation to several CPU cores
275  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
276  * @return True, if succeeded
277  */
278  static bool rotate(const Frame& source, Frame& target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
279 
280  /**
281  * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
282  * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
283  * @param sourceFrame The source image captured with the source camera profile, must be valid
284  * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
285  * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
286  * @param targetCamera The camera profile of the target frame, must be valid
287  * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
288  * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
289  * @param worker Optional worker object to distribute the computational load
290  * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
291  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use ElementType(0) for each channel
292  * @return True, if succeeded
293  * @see resampleCameraImageImage8BitPerChannel().
294  */
295  static bool resampleCameraImage(const Frame& sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, Frame& targetFrame, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const void* borderColor = nullptr);
296 
297  /**
298  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
299  * This function uses an integer interpolation with a precision of 1/128.
300  * @param frame The frame to determine the pixel values from, must be valid
301  * @param channels Number of channels of the given frame, with range [1, 8]
302  * @param width The width of the frame in pixel, with range [1, infinity)
303  * @param height The height of the frame in pixel, with range [1, infinity)
304  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
305  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
306  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
307  * @param result Resulting pixel values, must be valid, must be valid
308  * @return True, if succeeded
309  * @tparam TScalar The scalar data type of the sub-pixel position
310  */
311  template <typename TScalar = Scalar>
312  static bool interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result);
313 
314  /**
315  * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
316  * This function uses floating point precision during interpolation.
317  * @param frame The frame to determine the pixel values from, must be valid
318  * @param channels Number of channels of the given frame, with range [1, 8]
319  * @param width The width of the frame in pixel, with range [1, infinity)
320  * @param height The height of the frame in pixel, with range [1, infinity)
321  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
322  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
323  * @param position The position to determine the interpolated pixel values for, with range [0, width)x[0, height)
324  * @param result Resulting interpolated pixel value(s), must be valid
325  * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
326  * @return True, if succeeded
327  * @tparam TSource The data type of the provided pixel values in the (source) frame
328  * @tparam TTarget The data type of the resulting interpolated value(s)
329  * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
330  * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
331  */
332  template <typename TSource, typename TTarget, typename TScalar = Scalar, typename TIntermediate = TScalar>
333  static bool interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
334  };
335 
336  /**
337  * This class implements highly optimized interpolation functions with fixed properties.
338  * The functions can be significantly faster as these functions are tailored to the specific properties.
339  */
340  class OCEAN_CV_EXPORT SpecialCases
341  {
342  public:
343 
344  /**
345  * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
346  * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
347  * @param source The source frame buffer with resolution 400x400, must be valid
348  * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
349  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
350  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
351  * @see FrameInterpolatorBilinear::resize<T, tChannels>().
352  */
353  static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
354 
355  /**
356  * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 by using a bilinear interpolation.
357  * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
358  * @param source The source frame buffer with resolution 400x400, must be valid
359  * @param target The target frame buffer receiving the resized image information, with resolution 256x256, must be valid
360  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
361  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
362  * @see FrameInterpolatorBilinear::resize<T, tChannels>().
363  */
364  static void resize400x400To256x256_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
365  };
366 
367  /**
368  * Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinear interpolation.
369  * This function is actually a wrapper for scale().
370  * @param source The source frame buffer providing the image information to be resized, must be valid
371  * @param target The target frame buffer receiving the resized image information, must be valid
372  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
373  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
374  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
375  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
376  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
377  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
378  * @param worker Optional worker object to distribute the computation to several CPU cores
379  * @tparam T Data type of each pixel channel, e.g., float, double, int
380  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
381  * @see scale<T, tChannels>().
382  */
383  template <typename T, unsigned int tChannels>
384  static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
385 
386  /**
387  * Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interpolation with user-defined scaling factors.
388  * Beware: This function is not optimized for performance but supports arbitrary data types.<br>
389  * Try to use scale8BitPerChannel() if possible.
390  * @param source The source frame buffer providing the image information to be resized, must be valid
391  * @param target The target frame buffer receiving the rescaled image information, must be valid
392  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
393  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
394  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
395  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
396  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
397  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
398  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
399  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
400  * @param worker Optional worker object to distribute the computation to several CPU cores
401  * @tparam T Data type of each pixel channel, e.g., float, double, int
402  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
403  * @see resize<T, tChannels>().
404  */
405  template <typename T, unsigned int tChannels>
406  static inline void scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
407 
408  /**
409  * Rotates a given frame by a bilinear interpolation.
410  * The frame will be rotated around a specified anchor position (inside or outside the frame).
411  * @param source The source frame to be rotated, must be valid
412  * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
413  * @param width The width of the source and target frame in pixel, with range [1, infinity)
414  * @param height The height of the source and target frame in pixel, with range [1, infinity)
415  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
416  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
417  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
418  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
419  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
420  * @param worker Optional worker object to distribute the computation to several CPU cores
421  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
422  * @tparam tChannels The number of channels both frames have, with range [1, infinity)
423  */
424  template <unsigned int tChannels>
425  static inline void rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
426 
427  /**
428  * Apply an affine transforms to a N-channel, 8-bit frame
429  * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.<br>
430  * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation).<br>
431  * The 'targetOrigin' parameter simply applies an additional translation onto the provided affine transformation i.e., affine * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
432  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
433  * <pre>
434  * a c e
435  * b d f
436  * 0 0 1
437  * </pre>
438  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
439  * @param source Input frame that will be transformed, must be valid
440  * @param sourceWidth Width of both images in pixel, with range [1, infinity)
441  * @param sourceHeight Height of both images pixel, with range [1, infinity)
442  * @param source_A_target Affine transformation, such that: sourcePoint = source_A_target * targetPoint
443  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
444  * @param target The target frame using the given affine transform, must be valid
445  * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
446  * @param targetWidth The width of the target image in pixel, with range [1, infinity)
447  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
448  * @param sourcePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
449  * @param targetPaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
450  * @param worker Optional worker object to distribute the computational load
451  * @tparam tChannels Number of channels of the frame
452  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
453  */
454  template <unsigned int tChannels>
455  static inline void affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
456 
457  /**
458  * Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of a homography.
459  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
460  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
461  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
462  * @param input The input frame that will be transformed, must be valid
463  * @param inputWidth Width of both images in pixel, with range [1, infinity)
464  * @param inputHeight Height of both images pixel, with range [1, infinity)
465  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
466  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
467  * @param output The output frame using the given homography, must be valid
468  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
469  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
470  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
471  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
472  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
473  * @param worker Optional worker object to distribute the computational load
474  * @tparam T Data type of each pixel channel, e.g., float, double, int
475  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
476  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
477  */
478  template <typename T, unsigned int tChannels>
479  static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
480 
481  /**
482  * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
483  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
484  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
485  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
486  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
487  * @param input The input frame that will be transformed
488  * @param inputWidth Width of both images in pixel, with range [1, infinity)
489  * @param inputHeight Height of both images pixel, with range [1, infinity)
490  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
491  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
492  * @param output The output frame using the given homography
493  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
494  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
495  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
496  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
497  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
498  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
499  * @param worker Optional worker object to distribute the computational load
500  * @tparam tChannels Number of channels of the frame
501  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
502  */
503  template <unsigned int tChannels>
504  static inline void homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
505 
506  /**
507  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
508  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
509  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
510  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
511  * @param input The input frame that will be transformed, must be valid
512  * @param inputWidth Width of both images in pixel, with range [1, infinity)
513  * @param inputHeight Height of both images pixel, with range [1, infinity)
514  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
515  * @param output The output frame using the given homography, must be valid
516  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid
517  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
518  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
519  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
520  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
521  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
522  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
523  * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
524  * @param worker Optional worker object to distribute the computational load
525  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
526  * @see homography(), homographyWithCamera8BitPerChannel().
527  */
528  template <unsigned int tChannels>
529  static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue /* = 0xFF*/, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr);
530 
531  /**
532  * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
533  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
534  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
535  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
536  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
537  * @param input The input frame that will be transformed
538  * @param inputWidth Width of both images in pixel, with range [1, infinity)
539  * @param inputHeight Height of both images pixel, with range [1, infinity)
540  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
541  * @param output The output frame using the given homography
542  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
543  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
544  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
545  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
546  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
547  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
548  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
549  * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
550  * @param worker Optional worker object to distribute the computational load
551  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
552  * @tparam tChannels Number of channels of the frame
553  * @see homography(), homographyWithCamera8BitPerChannel().
554  */
555  template <unsigned int tChannels>
556  static inline void homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
557 
558  /**
559  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
560  * This function also uses a camera profile to improve the interpolation accuracy.<br>
561  * The given homography is transformed into a homography for normalized image coordinates.<br>
562  * Thus, also distortion parameters of the camera profile can be applied.<br>
563  * @param inputCamera The pinhole camera profile to be applied for the input frame
564  * @param outputCamera The pinhole camera profile to be applied for the output frame
565  * @param input The input frame that will be transformed
566  * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
567  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
568  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
569  * @param output The output frame using the given homography
570  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
571  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
572  * @param worker Optional worker object to distribute the computational load
573  * @tparam tChannels Number of channels of the frame
574  * @see homography().
575  */
576  template <unsigned int tChannels>
577  static inline void homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
578 
579  /**
580  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
581  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame.<br>
582  * This function also uses a camera profile to improve the interpolation accuracy.<br>
583  * The given homography is transformed into a homography for normalized image coordinates.<br>
584  * Thus, also distortion parameters of the camera profile can be applied.
585  * @param inputCamera The pinhole camera profile to be applied for the input frame, must be valid
586  * @param outputCamera The pinhole camera profile to be applied for the output frame, must be valid
587  * @param input The input frame that will be transformed, must be valid
588  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
589  * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
590  * @param output The output frame using the given homography
591  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
592  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
593  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
594  * @param worker Optional worker object to distribute the computational load
595  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
596  * @tparam tChannels Number of channels of the frame
597  */
598  template <unsigned int tChannels>
599  static inline void homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
600 
601  /**
602  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
603  * The frame must have a 1-plane pixel format.<br>
604  * The output frame must have the same pixel format and pixel origin as the input frame.
605  * @param input The input frame which will be transformed, must be valid
606  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
607  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
608  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
609  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
610  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
611  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
612  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
613  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
614  * @param worker Optional worker object to distribute the computation
615  * @tparam T Data type of each pixel channel, e.g., float, double, int
616  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
617  */
618  template <typename T, unsigned int tChannels>
619  static inline void lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
620 
621  /**
622  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
623  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
624  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
625  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
626  * @param input The input frame which will be transformed
627  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
628  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
629  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
630  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
631  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
632  * @param outputMask Resulting mask frame with 8 bits per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
633  * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
634  * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
635  * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
636  * @param worker Optional worker object to distribute the computation
637  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
638  * @tparam tChannels Number of channels of the frame
639  */
640  template <unsigned int tChannels>
641  static inline void lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
642 
643  /**
644  * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
645  * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
646  * @param sourceFrame The source image captured with the source camera profile, must be valid
647  * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
648  * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
649  * @param targetCamera The camera profile of the target frame, must be valid
650  * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
651  * @param sourceFramePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
652  * @param targetFramePaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
653  * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
654  * @param worker Optional worker object to distribute the computational load
655  * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
656  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use T(0) for each channel
657  * @tparam T Data type of each pixel channel, e.g., uint8_t, int16_t, float, double
658  * @tparam tChannels The number of frame channels, with range [1, infinity)
659  * @see Comfort::resampleCameraImage().
660  */
661  template <typename T, unsigned int tChannels>
662  static void resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const T* borderColor = nullptr);
663 
664  /**
665  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
666  * This function uses an integer interpolation with a precision of 1/128.
667  * @param frame The frame to determine the pixel values from, must be valid
668  * @param width The width of the frame in pixel, with range [1, infinity)
669  * @param height The height of the frame in pixel, with range [1, infinity)
670  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
671  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
672  * @param result Resulting pixel values, must be valid, must be valid
673  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
674  * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
675  * @tparam TScalar The scalar data type of the sub-pixel position
676  * @see interpolatePixel().
677  */
678  template <unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
679  static inline void interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result);
680 
681  /**
682  * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
683  * This function uses floating point precision during interpolation.
684  * @param frame The frame to determine the pixel values from, must be valid
685  * @param width The width of the frame in pixel, with range [1, infinity)
686  * @param height The height of the frame in pixel, with range [1, infinity)
687  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
688  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
689  * @param result Resulting interpolated pixel value(s), must be valid
690  * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
691  * @tparam TSource The data type of the provided pixel values in the (source) frame
692  * @tparam TTarget The data type of the resulting interpolated value(s)
693  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
694  * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
695  * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
696  * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
697  * @see interpolatePixel8BitPerChannel().
698  */
699  template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar, typename TIntermediate = TScalar>
700  static inline void interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
701 
702  /**
703  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame with alpha channel.
704  * The center of each pixel is located with an offset of (0.5 x 0.5) in relation to the real pixel position.<br>
705  * The given frame is virtually extended by a fully transparent border so that this functions supports arbitrary interpolation positions.<br>
706  * If the given position lies inside the frame area of (-0.5, -0.5) -> (width + 0.5, height + 0.5) the resulting interpolation result will contain color information of the frame, otherwise a fully transparent interpolation result is provided.<br>
707  * @param frame The frame to determine the pixel values from, must be valid
708  * @param width The width of the frame in pixel, with range [1, infinity)
709  * @param height The height of the frame in pixel, with range [1, infinity)
710  * @param position The position to determine the interpolated pixel values for, with range (-infinity, infinity)x(-infinity, infinity)
711  * @param result Resulting pixel values, must be valid
712  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
713  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
714  * @tparam tAlphaAtFront True, if the alpha channel is in the front of the data channels
715  * @tparam tTransparentIs0xFF True, if 0xFF is interpreted as fully transparent
716  */
717  template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
718  static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements);
719 
720  /**
721  * Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as lined integral frame.
722  * @param linedIntegralFrame The lined integral image created from the actual gray-scale image for which the patch intensity sum will be determined, must be valid
723  * @param frameWidth Width of the original frame in pixel (not the width of the lined-integral frame), with range [1, infinity)
724  * @param frameHeight Height of the original frame in pixel (not the height of the lined-integral frame), with range [1, infinity)
725  * @param lineIntegralFramePaddingElements The number of padding elements at the end of each integral image row, in elements, with range [0, infinity)
726  * @param center 2D coordinates of the center point of the patch, with range [patchWidth/2, frameWidth - patchWidth/2)x[patchHeight/2, frameHeight - patchHeight/2) for PC_CENTER
727  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
728  * @param patchWidth Width of the calculated patch in pixel with range [1, frameWidth - 1]
729  * @param patchHeight Height of the calculated patch in pixel with range [1, frameHeight - 1]
730  * @return The resulting sum of the pixel intensities
731  */
732  static Scalar patchIntensitySum1Channel(const uint32_t* linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2& center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight);
733 
734  /**
735  * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the bilinear interpolation) or whether the homography relies on missing image information.
736  * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
737  * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
738  * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
739  * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
740  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
741  * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
742  * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
743  * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
744  */
745  static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
746 
747  private:
748 
749  /**
750  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
751  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
752  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
753  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
754  * @param input The input frame that will be transformed, must be valid
755  * @param inputWidth Width of both images in pixel, with range [1, infinity)
756  * @param inputHeight Height of both images pixel, with range [1, infinity)
757  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
758  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
759  * @param output The output frame using the given homography, must be valid
760  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
761  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
762  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
763  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
764  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
765  * @param worker Optional worker object to distribute the computational load
766  * @tparam tChannels Number of channels of the frame
767  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
768  */
769  template <unsigned int tChannels>
770  static inline void homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
771 
772  /**
773  * Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-defined scaling factors.
774  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
775  * Information: This function is the equivalent to OpenCV's cv::resize().
776  * @param source The source frame buffer providing the image information to be resized, must be valid
777  * @param target The target frame buffer receiving the rescaled image information, must be valid
778  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
779  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
780  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
781  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
782  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
783  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
784  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
785  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
786  * @param worker Optional worker object to distribute the computation to several CPU cores
787  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
788  */
789  template <unsigned int tChannels>
790  static inline void scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
791 
792  /**
793  * Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
794  * @param source The image data of the source frame to be resized, must be valid
795  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
796  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
797  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
798  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
799  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
800  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
801  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
802  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
803  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
804  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
805  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
806  * @tparam tChannels Number of frame channels, with range [0, infinity)
807  */
808  template <unsigned int tChannels>
809  static void scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
810 
811  /**
812  * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
813  * This function uses interpolation factors with 7 bit precision and does not apply any SIMD instructions.
814  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
815  * @param targetRow The target row receiving the interpolation result, must be valid
816  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
817  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
818  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
819  * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
820  * @see interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON<tChannels>().
821  */
822  static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
823 
824  /**
825  * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
826  * This function does not apply any SIMD instructions.<br>
827  * The length of both source rows is identical with the length of the target row.
828  * @param sourceRowTop The top source row to be used for interpolation, must be valid
829  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
830  * @param targetRow The target row receiving the interpolation result, must be valid
831  * @param elements The number of elements in the row to (width * channels), with range [1, infinity)
832  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
833  * @tparam T The data type of each element, should be 'float'
834  */
835  template <typename T>
836  static void interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
837 
838  /**
839  * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
840  * This function does not apply any SIMD instructions.
841  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
842  * @param targetRow The target row receiving the interpolation result, must be valid
843  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
844  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
845  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
846  * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
847  * @tparam T The data type of each element, should be 'float'
848  * @tparam tChannels The number of frame channels this function can handle, should be 1
849  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
850  */
851  template <typename T, unsigned int tChannels>
852  static void interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
853 
854 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
855 
856  /**
857  * Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
858  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.<br>
859  * The length of both source rows is identical with the length of the target row.
860  * @param sourceRowTop The top source row to be used for interpolation, must be valid
861  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
862  * @param targetRow The target row receiving the interpolation result, must be valid
863  * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
864  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 128 - factorBottom, with range [0, 128]
865  */
866  static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t* sourceRowTop, const uint8_t* sourceRowBottom, uint8_t* targetRow, const unsigned int elements, const unsigned int factorBottom);
867 
868  /**
869  * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
870  * This function applies NEON instructions.<br>
871  * The length of both source rows is identical with the length of the target row.
872  * @param sourceRowTop The top source row to be used for interpolation, must be valid
873  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
874  * @param targetRow The target row receiving the interpolation result, must be valid
875  * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
876  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
877  * @tparam T The data type of each element, should be 'float'
878  */
879  template <typename T>
880  static void interpolateRowVerticalNEON(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
881 
882  /**
883  * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
884  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
885  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
886  * @param targetRow The target row receiving the interpolation result, must be valid
887  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
888  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
889  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
890  * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
891  * @tparam tChannels The number of frame channels this function can handle, possible values are 1, 4
892  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
893  */
894  template <unsigned int tChannels>
895  static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
896 
897  /**
898  * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
899  * This function applies NEON instructions.
900  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
901  * @param targetRow The target row receiving the interpolation result, must be valid
902  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
903  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
904  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
905  * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
906  * @tparam T The data type of each element, should be 'float'
907  * @tparam tChannels The number of frame channels this function can handle, should be 1
908  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
909  */
910  template <typename T, unsigned int tChannels>
911  static void interpolateRowHorizontalNEON(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
912 
913  /**
914  * Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
915  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
916  * @param source The image data of the source frame to be resized, must be valid
917  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
918  * @param sourceWidth Width of the source frame in pixel, with range [2, 65.535]
919  * @param sourceHeight Height of the source frame in pixel, with range [1, 65.535]
920  * @param targetWidth Width of the target frame in pixel, with range [tMinimalTargetWidth, 65.535]
921  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
922  * @param channels The number of channels both frames have, with range [1, infinity)
923  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
924  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
925  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
926  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
927  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
928  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
929  * @see interpolateRowVertical8BitPerChannel7BitPrecisionNEON(), interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON().
930  */
931  static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
932 
933 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
934 
935  /**
936  * Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
937  * @param source The image data of the source frame to be resized, must be valid
938  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
939  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
940  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
941  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
942  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
943  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
944  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
945  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
946  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
947  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
948  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
949  * @tparam T The data type of each pixel channel, e.g., float, double, int, short, ...
950  * @tparam TScale The data type of the internal scaling factors to be used, should be 'float' or 'double'
951  * @tparam tChannels Number of frame channels, with range [0, infinity)
952  */
953  template <typename T, typename TScale, unsigned int tChannels>
954  static void scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
955 
956  /**
957  * Rotates a subset of a given frame by a bilinear interpolation.
958  * @param source The source frame to be rotated, must be valid
959  * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
960  * @param width The width of the source and target frame in pixel, with range [1, infinity)
961  * @param height The height of the source and target frame in pixel, with range [1, infinity)
962  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
963  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
964  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
965  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
966  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
967  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
968  * @param firstTargetRow The first row of the target frame to be handled, with range [0, height)
969  * @param numberTargetRows The number of rows in the target frame to be handled, with range [1, height - firstTargetRow]
970  * @tparam tChannels Number of frame channels, with range [1, infinity)
971  */
972  template <unsigned int tChannels>
973  static void rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
974 
975  /**
976  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
977  * The affine transform must be provided in the following form: `sourcePoint = source_A_target * targetPoint`
978  * This function does not apply SIMD instructions and can be used for any frame dimensions.
979  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
980  * <pre>
981  * a c e
982  * b d f
983  * 0 0 1
984  * </pre>
985  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
986  * @param source Input frame that will be transformed
987  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
988  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
989  * @param source_A_target Affine transformation which is applied to the source frame.
990  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
991  * @param target Output frame using the given affine transform
992  * @param targetWidth The width of the target image in pixel, with range [1, infinity)
993  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
994  * @param firstTargetRow The first target row to be handled
995  * @param numberTargetRows Number of target rows to be handled
996  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
997  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
998  * @tparam tChannels Number of frame channels, with range [1, infinity)
999  * @see affine8BitPerChannelSSESubset(), affine8BitPerChannelNEONSubset()
1000  */
1001  template <unsigned int tChannels>
1002  static inline void affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1003 
1004  /**
1005  * Transforms an 8 bit per channel frame using the given homography.
1006  * The homography must provide the following transformation: inputPoint = homography * outputPoint
1007  * This function does not apply SIMD instructions and can be used for any frame dimensions.
1008  * @param input The input frame that will be transformed
1009  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1010  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1011  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1012  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1013  * @param output The output frame using the given homography
1014  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1015  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1016  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1017  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1018  * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1019  * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1020  * @tparam tChannels Number of frame channels, with range [1, infinity)
1021  * @see homography8BitPerChannelSSESubset(), homography8BitPerChannelNEONSubset()
1022  */
1023  template <unsigned int tChannels>
1024  static inline void homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1025 
1026  /**
1027  * Transforms a frame with (almost) arbitrary pixel format using the given homography.
1028  * This function does not apply SIMD instructions and can be used for any frame dimensions.
1029  * @param input The input frame that will be transformed
1030  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1031  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1032  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1033  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1034  * @param output The output frame using the given homography
1035  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1036  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1037  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1038  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1039  * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1040  * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1041  * @tparam T Data type of each pixel channel, e.g., float, double, int
1042  * @tparam tChannels Number of frame channels, with range [1, infinity)
1043  * @see homography8BitPerChannelSSESubset().
1044  */
1045  template <typename T, unsigned int tChannels>
1046  static inline void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1047 
1048 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1049 
1050  /**
1051  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
1052  * This function applies SSE instructions.<br>
1053  * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1054  * This function has the property: sourcePoint = source_A_target * targetPoint
1055  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1056  * <pre>
1057  * a c e
1058  * b d f
1059  * 0 0 1
1060  * </pre>
1061  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1062  * @param source Input frame that will be transformed
1063  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1064  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1065  * @param source_A_target Affine transformation which is applied to source frame.
1066  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1067  * @param target The target frame where the result of the transformation will be stored
1068  * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1069  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1070  * @param firstTargetRow The first target row to be handled
1071  * @param numberTargetRows Number of target rows to be handled
1072  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1073  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1074  * @tparam tChannels Number of frame channels
1075  * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
1076  */
1077  template <unsigned int tChannels>
1078  static inline void affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1079 
1080  /**
1081  * Transforms an 8 bit per channel frame using the given homography.
1082  * This function applies SSE instructions.<br>
1083  * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames
1084  * @param input The input frame that will be transformed, must be valid
1085  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1086  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1087  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1088  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1089  * @param output The output frame using the given homography, must be valid
1090  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1091  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1092  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1093  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1094  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1095  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1096  * @tparam tChannels Number of frame channels, with range [1, infinity)
1097  * @see homography8BitPerChannelSubset().
1098  */
1099  template <unsigned int tChannels>
1100  static inline void homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1101 
1102  /**
1103  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1104  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1105  * @param source The source image in which the four independent pixels are located, must be valid
1106  * @param offsetsTopLeft The four offsets within the source image for the four top-left pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1107  * @param offsetsTopRight The four offsets within the source image for the four top-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1108  * @param offsetsBottomLeft The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1109  * @param offsetsBottomRight The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1110  * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1111  * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1112  * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1113  * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1114  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1115  * @tparam tChannels The number of frame channels, with range [1, infinity)
1116  */
1117  template <unsigned int tChannels>
1118  static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1119 
1120  /**
1121  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1122  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1123  * @param m128_sourcesTopLeft The pixel values of the four top left pixels, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1124  * @param m128_sourcesTopRight The pixel values of the four top right pixels, starting at the first byte may contain unused bytes at the end
1125  * @param m128_sourcesBottomLeft The pixel values of the four bottom left pixels, starting at the first byte may contain unused bytes at the end
1126  * @param m128_sourcesBottomRight The pixel values of the four bottom right pixels, starting at the first byte may contain unused bytes at the end
1127  * @param m128_factorsTopLeft The four interpolation factors of the four top left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1128  * @param m128_factorsTopRight The four interpolation factors of the four top right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1129  * @param m128_factorsBottomLeft The four interpolation factors of the four bottom left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1130  * @param m128_factorsBottomRight The four interpolation factors of the four bottom right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1131  * @return The resulting interpolated pixel values, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1132  * @tparam tChannels The number of frame channels, with range [3, 4]
1133  */
1134  template <unsigned int tChannels>
1135  static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i& m128_sourcesTopLeft, const __m128i& m128_sourcesTopRight, const __m128i& m128_sourcesBottomLeft, const __m128i& m128_sourcesBottomRight, const __m128i& m128_factorsTopLeft, const __m128i& m128_factorsTopRight, const __m128i& m128_factorsBottomLeft, const __m128i& m128_factorsBottomRight);
1136 
1137 #endif // OCEAN_HARDWARE_SSE_VERSION
1138 
1139 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1140 
1141  /**
1142  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
1143  * This function applies NEON instructions.<br>
1144  * This one has the property: sourcePoint = source_A_target * targetPoint
1145  * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1146  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1147  * <pre>
1148  * a c e
1149  * b d f
1150  * 0 0 1
1151  * </pre>
1152  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1153  * @param source The source frame that will be transformed
1154  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1155  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1156  * @param source_A_target Affine transform used to transform the given source frame.
1157  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1158  * @param target The target frame using the given affine transform
1159  * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1160  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1161  * @param firstTargetRow The first target row to be handled
1162  * @param numberTargetRows Number of target rows to be handled
1163  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1164  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1165  * @tparam tChannels Number of frame channels, with range [1, infinity)
1166  * @see homography8BitPerChannelSubset().
1167  */
1168  template <unsigned int tChannels>
1169  static inline void affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1170 
1171  /**
1172  * Transforms an 8 bit per channel frame using the given homography.
1173  * This function applies NEON instructions.<br>
1174  * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames.
1175  * @param input The input frame that will be transformed
1176  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1177  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1178  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1179  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1180  * @param output The output frame using the given homography
1181  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1182  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1183  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1184  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1185  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1186  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1187  * @tparam tChannels Number of frame channels, with range [1, infinity)
1188  * @see homography8BitPerChannelSubset().
1189  */
1190  template <unsigned int tChannels>
1191  static inline void homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1192 
1193  /**
1194  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1195  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1196  * @param source The source image in which the four independent pixels are located, must be valid
1197  * @param offsetsTopLeftElements The four offsets within the source image for the four top-left pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1198  * @param offsetsTopRightElements The four offsets within the source image for the four top-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1199  * @param offsetsBottomLeftElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1200  * @param offsetsBottomRightElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1201  * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1202  * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1203  * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1204  * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1205  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1206  * @tparam tChannels The number of frame channels, with range [1, infinity)
1207  */
1208  template <unsigned int tChannels>
1209  static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1210 
1211  /**
1212  * Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must be known already (top-left, top-right, bottom-left, and bottom-right), further the interpolation factors must be known already.
1213  * @param topLeft_u_8x8 The 8 top left pixel values to be used for interpolation
1214  * @param topRight_u_8x8 The 8 top right pixel values to be used for interpolation
1215  * @param bottomLeft_u_8x8 The 8 bottom left pixel values to be used for interpolation
1216  * @param bottomRight_u_8x8 The 8 bottom right pixel values to be used for interpolation
1217  * @param factorsRight_factorsBottom_128_u_8x16 The eight horizontal interpolation factors for right pixels, and the eight vertical interpolation factors for the bottom pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1218  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1219  */
1220  static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1221 
1222 #endif // OCEAN_HARDWARE_SSE_VERSION
1223 
1224  /**
1225  * Transforms an 8 bit per channel frame using the given homographies.
1226  * @param input The input frame that will be transformed
1227  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1228  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1229  * @param homographies Homographies used to transform the given input frame
1230  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1231  * @param output The output frame using the given homography
1232  * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1233  * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1234  * @param outputOriginX The horizontal coordinate of the output frame's origin
1235  * @param outputOriginY The vertical coordinate of the output frame's origin
1236  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1237  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1238  * @param inputPaddingElements The number of padding elements at the end of each input frame, in elements, with range [0, infinity)
1239  * @param outputPaddingElements The number of padding elements at the end of each output frame, in elements, with range [0, infinity)
1240  * @param firstOutputRow The first output row to be handled
1241  * @param numberOutputRows Number of output rows to be handled
1242  * @tparam tChannels Number of frame channels
1243  */
1244  template <unsigned int tChannels>
1245  static inline void homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1246 
1247  /**
1248  * Transforms an 8 bit per channel frame using the given homography.
1249  * @param input The input frame that will be transformed, must be valid
1250  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1251  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1252  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1253  * @param output The output frame resulting by application of the given homography, must be valid
1254  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1255  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1256  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1257  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1258  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1259  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1260  * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
1261  * @param firstOutputRow The first output row to be handled
1262  * @param numberOutputRows Number of output rows to be handled
1263  * @tparam tChannels Number of frame channels, with range [1, infinity)
1264  */
1265  template <unsigned int tChannels>
1266  static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1267 
1268  /**
1269  * Transforms an 8 bit per channel frame using the given homography.
1270  * @param input The input frame that will be transformed
1271  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1272  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1273  * @param homographies Homographies used to transform the given input frame
1274  * @param output The output frame resulting by application of the given homography
1275  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1276  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1277  * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1278  * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1279  * @param outputOriginX The horizontal coordinate of the output frame's origin
1280  * @param outputOriginY The vertical coordinate of the output frame's origin
1281  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1282  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1283  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1284  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1285  * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
1286  * @param firstOutputRow The first output row to be handled
1287  * @param numberOutputRows Number of output rows to be handled
1288  * @tparam tChannels Number of frame channels
1289  */
1290  template <unsigned int tChannels>
1291  static inline void homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1292 
1293  /**
1294  * Transforms an 8 bit per channel frame using the given homography.
1295  * @param inputCamera The pinhole camera profile to be applied for the input frame
1296  * @param outputCamera The pinhole camera profile to be applied for the output frame
1297  * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1298  * @param input The input frame that will be transformed
1299  * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1300  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
1301  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1302  * @param output The output frame resulting by application of the given homography
1303  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1304  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1305  * @param firstRow The first row to be handled
1306  * @param numberRows Number of rows to be handled
1307  * @tparam tChannels Number of frame channels
1308  */
1309  template <unsigned int tChannels>
1310  static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1311 
1312  /**
1313  * Transforms an 8 bit per channel frame using the given homography.
1314  * @param inputCamera The pinhole camera profile to be applied for the input frame
1315  * @param outputCamera The pinhole camera profile to be applied for the output frame
1316  * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1317  * @param input The input frame that will be transformed, must be valid
1318  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1319  * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1320  * @param output The output frame resulting by application of the given homography
1321  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1322  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1323  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
1324  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1325  * @param firstRow The first row to be handled
1326  * @param numberRows Number of rows to be handled
1327  * @tparam tChannels Number of frame channels
1328  */
1329  template <unsigned int tChannels>
1330  static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
1331 
1332  /**
1333  * Transforms a subset of a given input frame with uint8_t as element type into an output frame by application of an interpolation lookup table.
1334  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1335  * @param input The input frame which will be transformed, must be valid
1336  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1337  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1338  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1339  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1340  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1341  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1342  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1343  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1344  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1345  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1346  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1347  */
1348  template <unsigned int tChannels>
1349  static void lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1350 
1351  /**
1352  * Transforms a subset of a given input frame with arbitrary element type into an output frame by application of an interpolation lookup table.
1353  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1354  * @param input The input frame which will be transformed, must be valid
1355  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1356  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1357  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1358  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1359  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
1360  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
1361  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1362  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1363  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1364  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1365  * @tparam T Data type of each pixel channel, must not be 'uint8_t'
1366  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1367  */
1368  template <typename T, unsigned int tChannels>
1369  static void lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1370 
1371 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1372 
1373  /**
1374  * Transforms a subset of a given input frame into an output frame by application of an interpolation lookup table and uses NEON instructions.
1375  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1376  * @param input The input frame which will be transformed, must be valid
1377  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1378  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1379  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), with table width >= 4, must be valid
1380  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1381  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1382  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1383  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1384  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1385  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1386  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1387  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1388  */
1389  template <unsigned int tChannels>
1390  static void lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1391 
1392 #endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1393 
1394  /**
1395  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
1396  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1397  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
1398  * @param input The input frame which will be transformed
1399  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1400  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1401  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1402  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1403  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1404  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1405  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
1406  * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
1407  * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
1408  * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
1409  * @param firstRow First row to be handled
1410  * @param numberRows Number of rows to be handled
1411  * @tparam tChannels Number of channels of the frame
1412  */
1413  template <unsigned int tChannels>
1414  static void lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1415 };
1416 
1417 inline bool FrameInterpolatorBilinear::Comfort::resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker)
1418 {
1419  ocean_assert(frame.isValid());
1420  ocean_assert(width >= 1u && height >= 1u);
1421 
1422  Frame target(FrameType(frame, width, height));
1423 
1424  if (!resize(frame, target, worker))
1425  {
1426  return false;
1427  }
1428 
1429  frame = std::move(target);
1430  return true;
1431 }
1432 
1433 template <typename TScalar>
1434 bool FrameInterpolatorBilinear::Comfort::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result)
1435 {
1436  ocean_assert(frame != nullptr);
1437  ocean_assert(channels >= 1u && channels <= 8u);
1438 
1439  if (pixelCenter == PC_TOP_LEFT)
1440  {
1441  switch (channels)
1442  {
1443  case 1u:
1444  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1445  return true;
1446 
1447  case 2u:
1448  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1449  return true;
1450 
1451  case 3u:
1452  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1453  return true;
1454 
1455  case 4u:
1456  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1457  return true;
1458 
1459  case 5u:
1460  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1461  return true;
1462 
1463  case 6u:
1464  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1465  return true;
1466 
1467  case 7u:
1468  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1469  return true;
1470 
1471  case 8u:
1472  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1473  return true;
1474  }
1475  }
1476  else
1477  {
1478  ocean_assert(pixelCenter == PC_CENTER);
1479 
1480  switch (channels)
1481  {
1482  case 1u:
1483  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1484  return true;
1485 
1486  case 2u:
1487  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1488  return true;
1489 
1490  case 3u:
1491  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1492  return true;
1493 
1494  case 4u:
1495  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1496  return true;
1497 
1498  case 5u:
1499  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1500  return true;
1501 
1502  case 6u:
1503  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1504  return true;
1505 
1506  case 7u:
1507  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1508  return true;
1509 
1510  case 8u:
1511  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1512  return true;
1513  }
1514  }
1515 
1516  ocean_assert(false && "Invalid channel number");
1517  return false;
1518 }
1519 
1520 template <typename TSource, typename TTarget, typename TScalar, typename TIntermediate>
1521 bool FrameInterpolatorBilinear::Comfort::interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
1522 {
1523  ocean_assert(frame != nullptr);
1524  ocean_assert(channels >= 1u && channels <= 8u);
1525 
1526  if (pixelCenter == PC_TOP_LEFT)
1527  {
1528  switch (channels)
1529  {
1530  case 1u:
1531  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1532  return true;
1533 
1534  case 2u:
1535  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1536  return true;
1537 
1538  case 3u:
1539  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1540  return true;
1541 
1542  case 4u:
1543  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1544  return true;
1545 
1546  case 5u:
1547  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1548  return true;
1549 
1550  case 6u:
1551  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1552  return true;
1553 
1554  case 7u:
1555  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1556  return true;
1557 
1558  case 8u:
1559  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1560  return true;
1561  }
1562  }
1563  else
1564  {
1565  ocean_assert(pixelCenter == PC_CENTER);
1566 
1567  switch (channels)
1568  {
1569  case 1u:
1570  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1571  return true;
1572 
1573  case 2u:
1574  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1575  return true;
1576 
1577  case 3u:
1578  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1579  return true;
1580 
1581  case 4u:
1582  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1583  return true;
1584 
1585  case 5u:
1586  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1587  return true;
1588 
1589  case 6u:
1590  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1591  return true;
1592 
1593  case 7u:
1594  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1595  return true;
1596 
1597  case 8u:
1598  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1599  return true;
1600  }
1601  }
1602 
1603  ocean_assert(false && "Invalid channel number");
1604  return false;
1605 }
1606 
1607 template <typename T, unsigned int tChannels>
1608 inline void FrameInterpolatorBilinear::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1609 {
1610  ocean_assert(source != nullptr && target != nullptr);
1611  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1612  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1613 
1614  const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1615  const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1616 
1617  scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1618 }
1619 
1620 template <typename T, unsigned int tChannels>
1621 inline void FrameInterpolatorBilinear::scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1622 {
1623  ocean_assert(source != nullptr && target != nullptr);
1624  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1625  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1626  ocean_assert(sourceX_s_targetX > 0.0);
1627  ocean_assert(sourceY_s_targetY > 0.0);
1628 
1629  if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1630  {
1631  FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1632  return;
1633  }
1634 
1635  if (std::is_same<T, uint8_t>::value)
1636  {
1637  // we have a SIMD-based optimized version for 'uint8_t' data types
1638 
1639  scale8BitPerChannel<tChannels>((const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1640  }
1641  else
1642  {
1643  typedef typename FloatTyper<T>::Type TScale;
1644 
1645  if (worker)
1646  {
1647  worker->executeFunction(Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1648  }
1649  else
1650  {
1651  scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1652  }
1653  }
1654 }
1655 
1656 template <unsigned int tChannels>
1657 inline void FrameInterpolatorBilinear::affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const CV::PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1658 {
1659  // If applicable, apply an additional translation to the affine transformation.
1660  const SquareMatrix3 adjustedAffineTransform = source_A_target * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(targetOrigin.x()), Scalar(targetOrigin.y()), 1));
1661 
1662  if (worker)
1663  {
1664  if (targetWidth >= 4u)
1665  {
1666 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1667  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1668  return;
1669 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1670  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1671  return;
1672 #endif
1673  }
1674 
1675  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1676  }
1677  else
1678  {
1679  if (targetWidth >= 4u)
1680  {
1681 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1682  affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1683  return;
1684 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1685  affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1686  return;
1687 #endif
1688  }
1689 
1690  affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1691  }
1692 }
1693 
1694 template <unsigned int tChannels>
1695 inline void FrameInterpolatorBilinear::homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1696 {
1697  // we adjust the homography to address 'outputOrigin'
1698  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1699 
1700  if (worker)
1701  {
1702  if (outputWidth >= 4u)
1703  {
1704 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1705  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1706  return;
1707 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1708  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1709  return;
1710 #endif
1711  }
1712 
1713  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1714  }
1715  else
1716  {
1717  if (outputWidth >= 4u)
1718  {
1719 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1720  homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1721  return;
1722 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1723  homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1724  return;
1725 #endif
1726  }
1727 
1728  homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1729  }
1730 }
1731 
1732 template <typename T, unsigned int tChannels>
1733 inline void FrameInterpolatorBilinear::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1734 {
1735  if (std::is_same<T, uint8_t>::value)
1736  {
1737  homography8BitPerChannel<tChannels>((const uint8_t*)input, inputWidth, inputHeight, input_H_output, (const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1738  return;
1739  }
1740  else
1741  {
1742  // we adjust the homography to address 'outputOrigin'
1743  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1744 
1745  if (worker)
1746  {
1747  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1748  }
1749  else
1750  {
1751  homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1752  }
1753  }
1754 }
1755 
1756 template <unsigned int tChannels>
1757 inline void FrameInterpolatorBilinear::homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1758 {
1759  if (worker)
1760  {
1761  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1762  }
1763  else
1764  {
1765  homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1766  }
1767 }
1768 
1769 template <unsigned int tChannels>
1770 inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker)
1771 {
1772  // we adjust the homography to address 'outputOrigin'
1773  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1774 
1775  if (worker)
1776  {
1777  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1778  }
1779  else
1780  {
1781  homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1782  }
1783 }
1784 
1785 template <unsigned int tChannels>
1786 inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1787 {
1788  if (worker)
1789  {
1790  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1791  }
1792  else
1793  {
1794  homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1795  }
1796 }
1797 
1798 template <unsigned int tChannels>
1799 inline void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1800 {
1801  const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1802 
1803  const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1804 
1805  if (worker)
1806  {
1807  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.height());
1808  }
1809  else
1810  {
1811  homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.height());
1812  }
1813 }
1814 
1815 template <unsigned int tChannels>
1816 inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1817 {
1818  const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1819 
1820  const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1821 
1822  if (worker)
1823  {
1824  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.height(), 11u, 12u, 10u);
1825  }
1826  else
1827  {
1828  homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.height());
1829  }
1830 }
1831 
1832 template <typename T, unsigned int tChannels>
1833 inline void FrameInterpolatorBilinear::lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1834 {
1835  if constexpr (std::is_same<T, uint8_t>::value)
1836  {
1837 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1838  if ((tChannels >= 1u && input_LT_output.sizeX() >= 8) || (tChannels >= 2u && input_LT_output.sizeX() >= 4))
1839  {
1840  // NEON implementation for 1 channel: min width 8; for 2+ channels: min width 4
1841 
1842  if (worker)
1843  {
1844  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1845  }
1846  else
1847  {
1848  lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1849  }
1850 
1851  return;
1852  }
1853 #endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1854 
1855  if (worker)
1856  {
1857  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)input_LT_output.sizeY(), 9u, 10u, 20u);
1858  }
1859  else
1860  {
1861  lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1862  }
1863  }
1864  else
1865  {
1866  ocean_assert((!std::is_same<T, uint8_t>::value));
1867 
1868  if (worker)
1869  {
1870  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1871  }
1872  else
1873  {
1874  lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1875  }
1876  }
1877 }
1878 
1879 template <unsigned int tChannels>
1880 inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1881 {
1882  if (worker)
1883  {
1884  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 11u, 12u, 20u);
1885  }
1886  else
1887  {
1888  lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1889  }
1890 }
1891 
1892 template <typename T, unsigned int tChannels>
1893 void FrameInterpolatorBilinear::resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target, Worker* worker, const unsigned int binSizeInPixel, const T* borderColor)
1894 {
1895  static_assert(tChannels >= 1u, "Invalid channel number!");
1896 
1897  ocean_assert(sourceFrame != nullptr);
1898  ocean_assert(sourceCamera.isValid());
1899  ocean_assert(source_R_target.isOrthonormal());
1900  ocean_assert(targetCamera.isValid());
1901  ocean_assert(targetFrame != nullptr);
1902  ocean_assert(binSizeInPixel >= 1u);
1903 
1904  const size_t binsX = std::max(1u, targetCamera.width() / binSizeInPixel);
1905  const size_t binsY = std::max(1u, targetCamera.height() / binSizeInPixel);
1906  CV::FrameInterpolatorBilinear::LookupTable lookupTable(targetCamera.width(), targetCamera.height(), binsX, binsY);
1907 
1908  for (size_t yBin = 0; yBin <= lookupTable.binsY(); ++yBin)
1909  {
1910  for (size_t xBin = 0; xBin <= lookupTable.binsX(); ++xBin)
1911  {
1912  const Vector2 cornerPosition = lookupTable.binTopLeftCornerPosition(xBin, yBin);
1913 
1914  constexpr bool makeUnitVector = false; // we don't need a unit/normalized vector as we project the vector into the camera again
1915 
1916  const Vector3 rayI = source_R_target * targetCamera.vector(cornerPosition, makeUnitVector);
1917  const Vector3 rayIF = Vector3(rayI.x(), -rayI.y(), -rayI.z());
1918 
1919  if (rayIF.z() > Numeric::eps())
1920  {
1921  const Vector2 projectedPoint = sourceCamera.projectToImageIF(rayIF);
1922 
1923  lookupTable.setBinTopLeftCornerValue(xBin, yBin, projectedPoint - cornerPosition);
1924  }
1925  else
1926  {
1927  // simply a coordinate far outside the input
1928  lookupTable.setBinTopLeftCornerValue(xBin, yBin, Vector2(Scalar(sourceCamera.width() * 10u), Scalar(sourceCamera.height() * 10u)));
1929  }
1930  }
1931  }
1932 
1933  lookup<T, tChannels>(sourceFrame, sourceCamera.width(), sourceCamera.height(), lookupTable, true /*offset*/, borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1934 
1935  if (source_OLT_target)
1936  {
1937  *source_OLT_target = std::move(lookupTable);
1938  }
1939 }
1940 
1941 template <unsigned int tChannels>
1942 void FrameInterpolatorBilinear::rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker, const uint8_t* borderColor)
1943 {
1944  static_assert(tChannels != 0u, "Invalid channel number!");
1945 
1946  ocean_assert(source != nullptr && target != nullptr);
1947  ocean_assert(width >= 1u && height >= 1u);
1948 
1949  if (worker)
1950  {
1951  worker->executeFunction(Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1952  }
1953  else
1954  {
1955  rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1956  }
1957 }
1958 
1959 template <unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar>
1960 inline void FrameInterpolatorBilinear::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result)
1961 {
1962  static_assert(tChannels != 0u, "Invalid channel number!");
1963  static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
1964 
1965  ocean_assert(frame != nullptr && result != nullptr);
1966  ocean_assert(width != 0u && height != 0u);
1967 
1968  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
1969 
1970  ocean_assert(position.x() >= TScalar(0));
1971  ocean_assert(position.y() >= TScalar(0));
1972 
1973  if constexpr (tPixelCenter == PC_TOP_LEFT)
1974  {
1975  ocean_assert(position.x() <= TScalar(width - 1u));
1976  ocean_assert(position.y() <= TScalar(height - 1u));
1977 
1978  const unsigned int left = (unsigned int)(position.x());
1979  const unsigned int top = (unsigned int)(position.y());
1980  ocean_assert(left < width && top < height);
1981 
1982  const TScalar tx = position.x() - TScalar(left);
1983  ocean_assert(tx >= 0 && tx <= 1);
1984  const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
1985  const unsigned int txi_ = 128u - txi;
1986 
1987  const TScalar ty = position.y() - TScalar(top);
1988  ocean_assert(ty >= 0 && ty <= 1);
1989  const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
1990  const unsigned int tyi_ = 128u - tyi;
1991 
1992  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
1993  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
1994 
1995  const uint8_t* const topLeft = frame + top * frameStrideElements + tChannels * left;
1996 
1997  const unsigned int txty = txi * tyi;
1998  const unsigned int txty_ = txi * tyi_;
1999  const unsigned int tx_ty = txi_ * tyi;
2000  const unsigned int tx_ty_ = txi_ * tyi_;
2001 
2002  for (unsigned int n = 0u; n < tChannels; ++n)
2003  {
2004  result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2005  }
2006  }
2007  else
2008  {
2009  ocean_assert(tPixelCenter == PC_CENTER);
2010 
2011  ocean_assert(position.x() <= TScalar(width));
2012  ocean_assert(position.y() <= TScalar(height));
2013 
2014  const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2015  const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2016 
2017  const unsigned int left = (unsigned int)(xShifted);
2018  const unsigned int top = (unsigned int)(yShifted);
2019 
2020  ocean_assert(left < width);
2021  ocean_assert(top < height);
2022 
2023  const TScalar tx = xShifted - TScalar(left);
2024  const TScalar ty = yShifted - TScalar(top);
2025 
2026  ocean_assert(tx >= 0 && tx <= 1);
2027  ocean_assert(ty >= 0 && ty <= 1);
2028 
2029  const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2030  const unsigned int txi_ = 128u - txi;
2031 
2032  const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2033  const unsigned int tyi_ = 128u - tyi;
2034 
2035  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2036  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2037 
2038  const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2039 
2040  const unsigned int txty = txi * tyi;
2041  const unsigned int txty_ = txi * tyi_;
2042  const unsigned int tx_ty = txi_ * tyi;
2043  const unsigned int tx_ty_ = txi_ * tyi_;
2044 
2045  for (unsigned int n = 0u; n < tChannels; ++n)
2046  {
2047  result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2048  }
2049  }
2050 }
2051 
2052 template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar, typename TIntermediate>
2053 inline void FrameInterpolatorBilinear::interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
2054 {
2055  static_assert(tChannels != 0u, "Invalid channel number!");
2056  static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
2057 
2058  ocean_assert(frame != nullptr && result != nullptr);
2059  ocean_assert(width != 0u && height != 0u);
2060 
2061  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2062 
2063  ocean_assert(position.x() >= TScalar(0));
2064  ocean_assert(position.y() >= TScalar(0));
2065 
2066  if constexpr (tPixelCenter == PC_TOP_LEFT)
2067  {
2068  ocean_assert(position.x() <= TScalar(width - 1u));
2069  ocean_assert(position.y() <= TScalar(height - 1u));
2070 
2071  const unsigned int left = (unsigned int)(position.x());
2072  const unsigned int top = (unsigned int)(position.y());
2073 
2074  const TScalar tx = position.x() - TScalar(left);
2075  ocean_assert(tx >= 0 && tx <= 1);
2076 
2077  const TScalar ty = position.y() - TScalar(top);
2078  ocean_assert(ty >= 0 && ty <= 1);
2079 
2080  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2081  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2082 
2083  const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2084 
2085  const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2086  const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2087  const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2088  const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2089 
2090  ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2091 
2092  for (unsigned int n = 0u; n < tChannels; ++n)
2093  {
2094  result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2095  }
2096  }
2097  else
2098  {
2099  ocean_assert(tPixelCenter == PC_CENTER);
2100 
2101  ocean_assert(position.x() <= TScalar(width));
2102  ocean_assert(position.y() <= TScalar(height));
2103 
2104  const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2105  const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2106 
2107  const unsigned int left = (unsigned int)(xShifted);
2108  const unsigned int top = (unsigned int)(yShifted);
2109 
2110  ocean_assert(left < width);
2111  ocean_assert(top < height);
2112 
2113  const TScalar tx = xShifted - TScalar(left);
2114  const TScalar ty = yShifted - TScalar(top);
2115 
2116  ocean_assert(tx >= 0 && tx <= 1);
2117  ocean_assert(ty >= 0 && ty <= 1);
2118 
2119  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2120  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2121 
2122  const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2123 
2124  const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2125  const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2126  const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2127  const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2128 
2129  ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2130 
2131  for (unsigned int n = 0u; n < tChannels; ++n)
2132  {
2133  result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2134  }
2135  }
2136 }
2137 
2138 template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
2139 inline void FrameInterpolatorBilinear::interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements)
2140 {
2141  static_assert(tChannels != 0u, "Invalid channel number!");
2142 
2143  ocean_assert(frame && result);
2144 
2145  const Vector2 pos(position.x() - Scalar(0.5), position.y() - Scalar(0.5));
2146 
2147  // check whether the position is outside the frame and will therefore be 100% transparent
2148  if (pos.x() <= Scalar(-1) || pos.y() <= Scalar(-1) || pos.x() >= Scalar(width) || pos.y() >= Scalar(height))
2149  {
2150  for (unsigned int n = 0u; n < tChannels - 1u; ++n)
2151  {
2153  }
2154 
2155  result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2156 
2157  return;
2158  }
2159 
2160  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2161 
2162  const int left = int(Numeric::floor(pos.x()));
2163  const int top = int(Numeric::floor(pos.y()));
2164 
2165  ocean_assert(left >= -1 && left < int(width));
2166  ocean_assert(top >= -1 && top < int(height));
2167 
2168  if ((unsigned int)left < width - 1u && (unsigned int)top < height - 1u)
2169  {
2170  // we have a valid pixel position for the left, top, right and bottom pixel
2171 
2172  const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2173  const unsigned int txi_ = 128u - txi;
2174 
2175  const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2176  const unsigned int tyi_ = 128u - tyi;
2177 
2178  const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2179 
2180  const unsigned int txty = txi * tyi;
2181  const unsigned int txty_ = txi * tyi_;
2182  const unsigned int tx_ty = txi_ * tyi;
2183  const unsigned int tx_ty_ = txi_ * tyi_;
2184 
2185  for (unsigned int n = 0u; n < tChannels; ++n)
2186  {
2187  result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2188  + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2189  }
2190  }
2191  else
2192  {
2193  // we do not have a valid pixel for all 4-neighborhood pixels
2194 
2195  const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2196  const unsigned int txi_ = 128u - txi;
2197 
2198  const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2199  const unsigned int tyi_ = 128u - tyi;
2200 
2201  const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2202  const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2203 
2204  ocean_assert(left < int(width) && top < int(height));
2205  const uint8_t* const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2206 
2207  const unsigned int txty = txi * tyi;
2208  const unsigned int txty_ = txi * tyi_;
2209  const unsigned int tx_ty = txi_ * tyi;
2210  const unsigned int tx_ty_ = txi_ * tyi_;
2211 
2212  for (unsigned int n = FrameBlender::SourceOffset<tAlphaAtFront>::data(); n < tChannels + FrameBlender::SourceOffset<tAlphaAtFront>::data() - 1u; ++n)
2213  {
2214  result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2215  + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2216  }
2217 
2218  const uint8_t alphaTopLeft = (left >= 0 && top >= 0) ? topLeft[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2219  const uint8_t alphaTopRight = (left + 1u < width && top >= 0) ? topLeft[rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2220  const uint8_t alphaBottomLeft = (left >= 0 && top + 1u < height) ? topLeft[bottomOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2221  const uint8_t alphaBottomRight = (left + 1u < width && top + 1u < height) ? topLeft[bottomOffset + rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2222 
2223  result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = (alphaTopLeft * tx_ty_ + alphaTopRight * txty_ + alphaBottomLeft * tx_ty + alphaBottomRight * txty + 8192u) >> 14u;
2224  }
2225 }
2226 
2227 template <unsigned int tChannels>
2228 void FrameInterpolatorBilinear::affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberOutputRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2229 {
2230  static_assert(tChannels >= 1u, "Invalid channel number!");
2231 
2232  ocean_assert(source != nullptr && target != nullptr);
2233  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2234  ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2235  ocean_assert(source_A_target);
2236  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2237 
2238  ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2239 
2240  const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2241 
2242  const Scalar scalarSourceWidth_1 = Scalar(sourceWidth - 1u);
2243  const Scalar scalarSourceHeight_1 = Scalar(sourceHeight - 1u);
2244 
2245  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2246 
2247  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2248  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2249 
2250  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2251  {
2252  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2253 
2254  /*
2255  * We can slightly optimize the 3x3 matrix multiplication:
2256  *
2257  * | X0 Y0 Z0 | | x |
2258  * | X1 Y1 Z1 | * | y |
2259  * | 0 0 1 | | 1 |
2260  *
2261  * | xx | | X0 * x | | Y0 * y + Z0 |
2262  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2263  *
2264  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2265  *
2266  * C0 = Y0 * y + Z0
2267  * C1 = Y1 * y + Z1
2268  *
2269  * So the computation becomes:
2270  *
2271  * | x' | | X0 * x | | C0 |
2272  * | y' | = | X1 * x | + | C1 |
2273  */
2274 
2275  const Vector2 X(source_A_target->data() + 0);
2276  const Vector2 c(Vector2(source_A_target->data() + 3) * Scalar(y) + Vector2(source_A_target->data() + 6));
2277 
2278  for (unsigned int x = 0u; x < targetWidth; ++x)
2279  {
2280  const Vector2 sourcePosition = X * Scalar(x) + c;
2281 
2282 #ifdef OCEAN_DEBUG
2283  const Scalar debugSourceX = (*source_A_target)[0] * Scalar(x) + (*source_A_target)[3] * Scalar(y) + (*source_A_target)[6];
2284  const Scalar debugSourceY = (*source_A_target)[1] * Scalar(x) + (*source_A_target)[4] * Scalar(y) + (*source_A_target)[7];
2285  ocean_assert(sourcePosition.isEqual(Vector2(debugSourceX, debugSourceY), Scalar(0.01)));
2286 #endif
2287 
2288  if (sourcePosition.x() < Scalar(0) || sourcePosition.x() > scalarSourceWidth_1 || sourcePosition.y() < Scalar(0) || sourcePosition.y() > scalarSourceHeight_1)
2289  {
2290  *targetRow = *bColor;
2291  }
2292  else
2293  {
2294  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2295  }
2296 
2297  targetRow++;
2298  }
2299  }
2300 }
2301 
2302 template <unsigned int tChannels>
2303 void FrameInterpolatorBilinear::homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2304 {
2305  static_assert(tChannels >= 1u, "Invalid channel number!");
2306 
2307  ocean_assert(input != nullptr && output != nullptr);
2308  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2309  ocean_assert(outputWidth > 0u && outputHeight > 0u);
2310  ocean_assert(input_H_output != nullptr);
2311 
2312  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2313 
2314  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2315 
2316  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
2317  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
2318 
2319  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2320 
2321  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2322  const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2323 
2324  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2325  {
2326  /*
2327  * We can slightly optimize the 3x3 matrix multiplication:
2328  *
2329  * | X0 Y0 Z0 | | x |
2330  * | X1 Y1 Z1 | * | y |
2331  * | X2 Y2 Z2 | | 1 |
2332  *
2333  * | xx | | X0 * x | | Y0 * y + Z0 |
2334  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2335  * | zz | | X2 * x | | Y2 * y + Z2 |
2336  *
2337  * | xx | | X0 * x | | C0 |
2338  * | yy | = | X1 * x | + | C1 |
2339  * | zz | | X2 * x | | C2 |
2340  *
2341  * As y is constant within the inner loop, we can pre-calculate the following terms:
2342  *
2343  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2344  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2345  */
2346 
2347  const Vector2 X(input_H_output->data() + 0);
2348  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2349 
2350  const Scalar X2 = (*input_H_output)(2, 0);
2351  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2352 
2353  PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2354 
2355  for (unsigned int x = 0u; x < outputWidth; ++x)
2356  {
2357  ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2358  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2359 
2360 #ifdef OCEAN_DEBUG
2361  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2362  ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2363 #endif
2364 
2365  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
2366  {
2367  *outputRowPixel = bColor;
2368  }
2369  else
2370  {
2371  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2372  }
2373 
2374  ++outputRowPixel;
2375  }
2376  }
2377 }
2378 
2379 template <typename T, unsigned int tChannels>
2380 void FrameInterpolatorBilinear::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2381 {
2382  static_assert(tChannels >= 1u, "Invalid channel number!");
2383 
2384  ocean_assert(input != nullptr && output != nullptr);
2385  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2386  ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2387  ocean_assert(input_H_output != nullptr);
2388 
2389  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2390 
2391  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2392 
2393  const Scalar scalarInputWidth1 = Scalar(inputWidth - 1u);
2394  const Scalar scalarInputHeight1 = Scalar(inputHeight - 1u);
2395 
2396  // we need to find a best matching floating point data type for the intermediate interpolation results
2397  typedef typename FloatTyper<T>::Type TIntermediate;
2398 
2399  typedef typename DataType<T, tChannels>::Type PixelType;
2400 
2401  constexpr T zeroColor[tChannels] = {T(0)};
2402  const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2403 
2404  constexpr TIntermediate bias = TIntermediate(0);
2405 
2406  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2407  {
2408  /*
2409  * We can slightly optimize the 3x3 matrix multiplication:
2410  *
2411  * | X0 Y0 Z0 | | x |
2412  * | X1 Y1 Z1 | * | y |
2413  * | X2 Y2 Z2 | | 1 |
2414  *
2415  * | xx | | X0 * x | | Y0 * y + Z0 |
2416  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2417  * | zz | | X2 * x | | Y2 * y + Z2 |
2418  *
2419  * | xx | | X0 * x | | C0 |
2420  * | yy | = | X1 * x | + | C1 |
2421  * | zz | | X2 * x | | C3 |
2422  *
2423  * As y is constant within the inner loop, we can pre-calculate the following terms:
2424  *
2425  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2426  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2427  */
2428 
2429  const Vector2 X(input_H_output->data() + 0);
2430  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2431 
2432  const Scalar X2 = (*input_H_output)(2, 0);
2433  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2434 
2435  PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2436 
2437  for (unsigned int x = 0u; x < outputWidth; ++x)
2438  {
2439  ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2440  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2441 
2442 #ifdef OCEAN_DEBUG
2443  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2444  ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2445 #endif
2446 
2447  if (inputPosition.x() >= Scalar(0) && inputPosition.x() <= scalarInputWidth1 && inputPosition.y() >= Scalar(0) && inputPosition.y() <= scalarInputHeight1)
2448  {
2449  interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2450  }
2451  else
2452  {
2453  *outputRowPixel = *bColor;
2454  }
2455 
2456  ++outputRowPixel;
2457  }
2458  }
2459 }
2460 
2461 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2462 
2463 template <unsigned int tChannels>
2464 inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2465 {
2466  static_assert(tChannels >= 1u, "Invalid channel number!");
2467 
2468  ocean_assert(source && target);
2469  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2470  ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2471  ocean_assert(source_A_target);
2472  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2473 
2474  ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2475 
2476  const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2477  const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2478 
2479  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2480 
2481  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2482  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2483 
2484  OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2485 
2486  OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2487  OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2488  OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2489  OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2490 
2491  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2492  const __m128 m128_f_X0 = _mm_set_ps1(float((*source_A_target)(0, 0)));
2493  const __m128 m128_f_X1 = _mm_set_ps1(float((*source_A_target)(1, 0)));
2494 
2495  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2496  {
2497  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2498 
2499  /*
2500  * We can slightly optimize the 3x3 matrix multiplication:
2501  *
2502  * | X0 Y0 Z0 | | x |
2503  * | X1 Y1 Z1 | * | y |
2504  * | 0 0 1 | | 1 |
2505  *
2506  * | xx | | X0 * x | | Y0 * y + Z0 |
2507  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2508  *
2509  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2510  *
2511  * C0 = Y0 * y + Z0
2512  * C1 = Y1 * y + Z1
2513  *
2514  * So the computation becomes:
2515  *
2516  * | x' | | X0 * x | | C0 |
2517  * | y' | = | X1 * x | + | C1 |
2518  */
2519 
2520  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2521  const __m128 m128_f_C0 = _mm_set_ps1(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
2522  const __m128 m128_f_C1 = _mm_set_ps1(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
2523 
2524  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2525  const __m128 m128_f_zero = _mm_setzero_ps();
2526 
2527  // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2528  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2529 
2530  // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
2531  const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2532 
2533  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2534  const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(int(sourceWidth) - 1);
2535  const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(int(sourceHeight) - 1);
2536 
2537  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2538  const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(float(sourceWidth - 1u));
2539  const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(float(sourceHeight - 1u));
2540 
2541  for (unsigned int x = 0u; x < targetWidth; x += 4u)
2542  {
2543  if (x + 4u > targetWidth)
2544  {
2545  // the last iteration will not fit into the output frame,
2546  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2547 
2548  ocean_assert(x >= 4u && targetWidth > 4u);
2549  const unsigned int newX = targetWidth - 4u;
2550 
2551  ocean_assert(x > newX);
2552  targetRow -= x - newX;
2553 
2554  x = newX;
2555 
2556  // the for loop will stop after this iteration
2557  ocean_assert(!(x + 4u < targetWidth));
2558  }
2559 
2560 
2561  // we need four successive x coordinate floats:
2562  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2563  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2564 
2565  // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2566  const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2567  const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2568 
2569  // now we check whether we are inside the input frame
2570  const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero)); // inputPosition.x() <= (inputWidth - 1) && inputPosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
2571  const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero)); // inputPosition.y() <= (inputHeight - 1) && inputPosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
2572 
2573  const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
2574 
2575  // we can stop here if all pixels are invalid
2576  if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2577  {
2578 #ifdef OCEAN_DEBUG
2579  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2580  _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2581  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2582 #endif
2583 
2584  targetRow[0] = *bColor;
2585  targetRow[1] = *bColor;
2586  targetRow[2] = *bColor;
2587  targetRow[3] = *bColor;
2588 
2589  targetRow += 4;
2590 
2591  continue;
2592  }
2593 
2594  // we store the result
2595  _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2596  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2597 
2598 
2599  // now we determine the left, top, right and bottom pixel used for the interpolation
2600  const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2601  const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2602 
2603  // left = floor(x); top = floor(y)
2604  const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2605  const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2606 
2607  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2608  const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2609  const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2610 
2611  // offset = (y * sourceStrideElements + tChannels * x)
2612  const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * sourceStrideElements + tChannels * left)
2613  const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * sourceStrideElements + tChannels * right)
2614  const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2615  const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2616 
2617  // we store the offsets
2618  _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2619  _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2620  _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2621  _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2622 
2623 
2624  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2625 
2626  // we determine the fractional portions of the x' and y':
2627  // e.g., [43.1231, -12.5543, -34.123, 99.2]
2628  // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2629  __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2630  __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2631 
2632  // we use integer interpolation [0.0, 1.0] -> [0, 128]
2633  m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2634  m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2635 
2636  m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2637  m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2638 
2639  const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2640  const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2641 
2642  interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2643  targetRow += 4;
2644  }
2645  }
2646 }
2647 
2648 template <unsigned int tChannels>
2649 inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2650 {
2651  static_assert(tChannels >= 1u, "Invalid channel number!");
2652 
2653  ocean_assert(input != nullptr && output != nullptr);
2654  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2655  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2656  ocean_assert(input_H_output != nullptr);
2657 
2658  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2659 
2660  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2661  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2662 
2663  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2664 
2665  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2666  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2667 
2668  OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2669 
2670  OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2671  OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2672  OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2673  OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2674 
2675  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2676  const __m128 m128_f_X0 = _mm_set_ps1(float((*input_H_output)(0, 0)));
2677  const __m128 m128_f_X1 = _mm_set_ps1(float((*input_H_output)(1, 0)));
2678  const __m128 m128_f_X2 = _mm_set_ps1(float((*input_H_output)(2, 0)));
2679 
2680  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2681  const __m128 m128_f_zero = _mm_setzero_ps();
2682 
2683  // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2684  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2685 
2686  // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
2687  const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2688 
2689  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth -1, inputWidth -1], and same with inputHeight
2690  const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(int(inputWidth) - 1);
2691  const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(int(inputHeight) - 1);
2692 
2693  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2694  const __m128 m128_f_inputWidth_1 = _mm_set_ps1(float(inputWidth - 1u));
2695  const __m128 m128_f_inputHeight_1 = _mm_set_ps1(float(inputHeight - 1u));
2696 
2697  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2698  {
2699  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2700 
2701  /*
2702  * We can slightly optimize the 3x3 matrix multiplication:
2703  *
2704  * | X0 Y0 Z0 | | x |
2705  * | X1 Y1 Z1 | * | y |
2706  * | X2 Y2 Z2 | | 1 |
2707  *
2708  * | xx | | X0 * x | | Y0 * y + Z0 |
2709  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2710  * | zz | | X2 * x | | Y2 * y + Z2 |
2711  *
2712  * | xx | | X0 * x | | C0 |
2713  * | yy | = | X1 * x | + | C1 |
2714  * | zz | | X2 * x | | C2 |
2715  *
2716  * As y is constant within the inner loop, we can pre-calculate the following terms:
2717  *
2718  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2719  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2720  */
2721 
2722  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2723  const __m128 m128_f_C0 = _mm_set_ps1(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
2724  const __m128 m128_f_C1 = _mm_set_ps1(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
2725  const __m128 m128_f_C2 = _mm_set_ps1(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
2726 
2727  for (unsigned int x = 0u; x < outputWidth; x += 4u)
2728  {
2729  if (x + 4u > outputWidth)
2730  {
2731  // the last iteration will not fit into the output frame,
2732  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2733 
2734  ocean_assert(x >= 4u && outputWidth > 4u);
2735  const unsigned int newX = outputWidth - 4u;
2736 
2737  ocean_assert(x > newX);
2738  outputPixelData -= x - newX;
2739 
2740  x = newX;
2741 
2742  // the for loop will stop after this iteration
2743  ocean_assert(!(x + 4u < outputWidth));
2744  }
2745 
2746 
2747  // we need four successive x coordinate floats:
2748  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2749  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2750 
2751  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2752  const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2753  const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2754  const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2755 
2756 #ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
2757 
2758  // we calculate the (approximated) inverse of zz,
2759  // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
2760  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
2761  const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2762 
2763  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2764  const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2765  const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2766 
2767 #else
2768 
2769  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2770  const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2771  const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2772 
2773 #endif // USE_APPROXIMATED_INVERSE_OF_ZZ
2774 
2775 
2776  // now we check whether we are inside the input frame
2777  const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero)); // inputPosition.x() <= (inputWidth-1) && inputPosition.x() >= 0 ? 0xFFFFFF : 0x000000
2778  const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero)); // inputPosition.y() <= (inputHeight-1) && inputPosition.y() >= 0 ? 0xFFFFFF : 0x000000
2779 
2780  const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
2781 
2782  // we can stop here if all pixels are invalid
2783  if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2784  {
2785 #ifdef OCEAN_DEBUG
2786  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2787  _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2788  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2789 #endif
2790 
2791  outputPixelData[0] = *bColor;
2792  outputPixelData[1] = *bColor;
2793  outputPixelData[2] = *bColor;
2794  outputPixelData[3] = *bColor;
2795 
2796  outputPixelData += 4;
2797 
2798  continue;
2799  }
2800 
2801  // we store the result
2802  _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2803  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2804 
2805 
2806  // now we determine the left, top, right and bottom pixel used for the interpolation
2807  const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2808  const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2809 
2810  // left = floor(x); top = floor(y)
2811  const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2812  const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2813 
2814  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2815  const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2816  const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2817 
2818  // offset = (y * inputStrideElements + tChannels * x)
2819  const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * inputStrideElements + tChannels * left)
2820  const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * inputStrideElements + tChannels * right)
2821  const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2822  const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2823 
2824  // we store the offsets
2825  _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2826  _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2827  _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2828  _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2829 
2830 
2831  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2832 
2833  // we determine the fractional portions of the x' and y':
2834  // e.g., [43.1231, -12.5543, -34.123, 99.2]
2835  // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2836  __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2837  __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2838 
2839  // we use integer interpolation [0.0, 1.0] -> [0, 128]
2840  m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2841  m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2842 
2843  m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2844  m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2845 
2846  const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2847  const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2848 
2849  interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2850  outputPixelData += 4;
2851  }
2852  }
2853 }
2854 
2855 template <>
2856 OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2857 {
2858  // sourcesTopLeft stores the three color values of 4 (independent) pixels (the upper left pixels):
2859  // FEDC BA98 7654 3210
2860  // ---- VUYV UYVU YVUY
2861  // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2862 
2863  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2864  // FEDC BA98 7654 3210
2865  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2866 
2867 
2868  // we will simply extract each channel from the source pixels,
2869  // each extracted channel will be multiplied by the corresponding interpolation factor
2870  // and all interpolation results will be accumulated afterwards
2871 
2872  // FEDC BA98 7654 3210
2873  const __m128i mask32_Channel0 = SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull); // ---9 ---6 ---3 ---0
2874  const __m128i mask32_Channel1 = SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull); // ---A ---7 ---4 ---1
2875  const __m128i mask32_Channel2 = SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull); // ---B ---8 ---5 ---2
2876 
2877 
2878  // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2879  // FEDC BA98 7654 3210
2880  // ---9 ---6 ---3 ---0
2881  // *
2882  // FTL3 FTL2 FTL1 FTL0
2883  __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2884 
2885  // we the same multiplication for the second channel
2886  __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2887 
2888  // and third channel
2889  __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2890 
2891 
2892  // now we repeat the process for the top right pixel values
2893  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2894  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2895  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2896 
2897 
2898  // and for the bottom left pixel values
2899  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2900  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2901  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2902 
2903 
2904  // and for the bottom right pixel values
2905  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2906  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2907  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2908 
2909 
2910  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2911 
2912  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
2913 
2914  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
2915  // target data: ---9 ---6 ---3 ---0
2916  // shufflet target: ---- --9- -6-- 3--0
2917  // mask location: ---C ---8 ---4 ---0
2918  // mask: ---- --C- -8-- 4--0
2919  __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2920 
2921  // target data: ---A ---7 ---4 ---1
2922  // shufflet target: ---- -A-- 7--4 --1-
2923  // mask location: ---C ---8 ---4 ---0
2924  // mask: ---- -C-- 8--4 --0-
2925  __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2926 
2927  // target data: ---B ---8 ---5 ---2
2928  // shufflet target: ---- B--8 --5- -2--
2929  // mask location: ---C ---8 ---4 ---0
2930  // mask: ---- C--8 --4- -0--
2931  __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2932 
2933 
2934  // finally, we simply blend all interpolation results together
2935 
2936  return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2937 }
2938 
2939 template <>
2940 OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2941 {
2942  // sourcesTopLeft stores the four color values of 4 (independent) pixels (the upper left pixels):
2943  // FEDC BA98 7654 3210
2944  // AVUY AVUY AVUY AVUY
2945  // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2946 
2947  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2948  // FEDC BA98 7654 3210
2949  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2950 
2951 
2952  // we will simply extract each channel from the source pixels,
2953  // each extracted channel will be multiplied by the corresponding interpolation factor
2954  // and all interpolation results will be accumulated afterwards
2955 
2956  // FEDC BA98 7654 3210
2957  const __m128i mask32_Channel0 = SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull); // ---C ---8 ---4 ---0
2958  const __m128i mask32_Channel1 = SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull); // ---D ---9 ---5 ---1
2959  const __m128i mask32_Channel2 = SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull); // ---E ---A ---6 ---2
2960  const __m128i mask32_Channel3 = SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull); // ---F ---B ---7 ---3
2961 
2962 
2963  // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2964  // FEDC BA98 7654 3210
2965  // ---C ---8 ---4 ---0
2966  // *
2967  // FTL3 FTL2 FTL1 FTL0
2968  __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2969 
2970  // we the same multiplication for the second channel
2971  __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2972 
2973  // and third channel
2974  __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2975 
2976  // and last channel
2977  __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
2978 
2979 
2980  // now we repeat the process for the top right pixel values
2981  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2982  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2983  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2984  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
2985 
2986 
2987  // and for the bottom left pixel values
2988  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2989  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2990  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2991  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
2992 
2993 
2994  // and for the bottom right pixel values
2995  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2996  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2997  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2998  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
2999 
3000 
3001  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3002 
3003  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128)
3004 
3005  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3006  // ---C ---8 ---4 ---0
3007  // ---C ---9 ---4 ---0
3008  __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3009 
3010  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3011  // ---D ---9 ---5 ---1
3012  // --D- --9- --5- --1-
3013  __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3014 
3015  // ---E ---A ---6 ---2
3016  // -E-- -A-- -6-- -2--
3017  __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3018 
3019  // ---F ---B ---7 ---3
3020  // F--- B--- 7--- 3---
3021  __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3022 
3023 
3024  // finally, we simply blend all interpolation results together
3025 
3026  return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3027 }
3028 
3029 #ifdef OCEAN_COMPILER_MSC
3030 
3031 // we see a significant performance decrease with non-VS compilers/platforms,
3032 // so we do not use the 3channel version with non-Windows compilers
3033 
3034 template <>
3035 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3036 {
3037  ocean_assert(source != nullptr);
3038  ocean_assert(targetPositionPixels != nullptr);
3039 
3040  typedef typename DataType<uint8_t, 1u>::Type PixelType;
3041 
3042  // as we do not initialize the following intermediate data,
3043  // we hopefully will not allocate memory on the stack each time this function is called
3044  OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3045 
3046  // we gather the individual source pixel values from the source image,
3047  // based on the calculated pixel locations
3048  for (unsigned int i = 0u; i < 4u; ++i)
3049  {
3050  if (validPixels[i])
3051  {
3052  pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3053  pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3054  pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3055  pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3056  }
3057  else
3058  {
3059  pixels[i * 4u + 0u] = borderColor;
3060  pixels[i * 4u + 1u] = borderColor;
3061  pixels[i * 4u + 2u] = borderColor;
3062  pixels[i * 4u + 3u] = borderColor;
3063  }
3064  }
3065 
3066  static_assert(sizeof(__m128i) == sizeof(pixels), "Invalid data type!");
3067 
3068  const __m128i m128_pixels = _mm_load_si128((const __m128i*)pixels);
3069 
3070 
3071  // factorLeft = 128 - factorRight
3072  // factorTop = 128 - factorBottom
3073 
3074  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3075  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3076 
3077  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3078  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3079 
3080  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3081  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3082  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3083  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3084 
3085  // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3086  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3087  // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3088 
3089  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3090  // FEDC BA98 7654 3210
3091  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3092 
3093 
3094  // we will simply extract each channel from the source pixels,
3095  // each extracted channel will be multiplied by the corresponding interpolation factor
3096  // and all interpolation results will be accumulated afterwards
3097 
3098  // FEDC BA98 7654 3210
3099  const __m128i mask32_topLeft = SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull); // ---C ---8 ---4 ---0
3100  const __m128i mask32_topRight = SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull); // ---D ---9 ---5 ---1
3101  const __m128i mask32_bottomLeft = SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull); // ---E ---A ---6 ---2
3102  const __m128i mask32_bottomRight = SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull); // ---F ---B ---7 ---3
3103 
3104 
3105  // we extract the top left values and multiply them with the interpolation factors
3106  // FEDC BA98 7654 3210
3107  // ---C ---8 ---4 ---0
3108  // *
3109  // FTL3 FTL2 FTL1 FTL0
3110  __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3111  __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3112 
3113  multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3114  multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3115 
3116  __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3117 
3118  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3119 
3120  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
3121  // additionally, we shuffle the individual results together
3122 
3123  const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3124 
3125  *((unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3126 }
3127 
3128 template <>
3129 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3130 {
3131  ocean_assert(source != nullptr);
3132  ocean_assert(targetPositionPixels != nullptr);
3133 
3134  typedef typename DataType<uint8_t, 3u>::Type PixelType;
3135 
3136  // as we do not initialize the following intermediate data,
3137  // we hopefully will not allocate memory on the stack each time this function is called
3138  OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3139  OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3140  OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3141  OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3142 
3143  // we gather the individual source pixel values from the source image,
3144  // based on the calculated pixel locations
3145  for (unsigned int i = 0u; i < 4u; ++i)
3146  {
3147  if (validPixels[i])
3148  {
3149  topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3150  topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3151  bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3152  bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3153  }
3154  else
3155  {
3156  topLeftPixels[i] = borderColor;
3157  topRightPixels[i] = borderColor;
3158  bottomLeftPixels[i] = borderColor;
3159  bottomRightPixels[i] = borderColor;
3160  }
3161  }
3162 
3163  static_assert(sizeof(__m128i) <= sizeof(topLeftPixels), "Invalid data type!");
3164 
3165  const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3166  const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3167  const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3168  const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3169 
3170 
3171  // factorLeft = 128 - factorRight
3172  // factorTop = 128 - factorBottom
3173 
3174  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3175  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3176 
3177  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3178  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3179 
3180  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3181  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3182  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3183  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3184 
3185 
3186  const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3187 
3188  // we copy the first 12 bytes
3189  memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3190 }
3191 
3192 #endif // OCEAN_COMPILER_MSC
3193 
3194 template <>
3195 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3196 {
3197  ocean_assert(source != nullptr);
3198  ocean_assert(targetPositionPixels != nullptr);
3199 
3200  typedef typename DataType<uint8_t, 4u>::Type PixelType;
3201 
3202  // as we do not initialize the following intermediate data,
3203  // we hopefully will not allocate memory on the stack each time this function is called
3204  OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3205  OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3206  OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3207  OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3208 
3209  // we gather the individual source pixel values from the source image,
3210  // based on the calculated pixel locations
3211 
3212  for (unsigned int i = 0u; i < 4u; ++i)
3213  {
3214  if (validPixels[i])
3215  {
3216  topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3217  topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3218  bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3219  bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3220  }
3221  else
3222  {
3223  topLeftPixels[i] = borderColor;
3224  topRightPixels[i] = borderColor;
3225  bottomLeftPixels[i] = borderColor;
3226  bottomRightPixels[i] = borderColor;
3227  }
3228  }
3229 
3230  static_assert(sizeof(__m128i) == sizeof(topLeftPixels), "Invalid data type!");
3231 
3232  const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3233  const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3234  const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3235  const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3236 
3237 
3238  // factorLeft = 128 - factorRight
3239  // factorTop = 128 - factorBottom
3240 
3241  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3242  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3243 
3244  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3245  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3246 
3247  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3248  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3249  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3250  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3251 
3252 
3253  const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3254 
3255  _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3256 }
3257 
3258 template <unsigned int tChannels>
3259 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3260 {
3261  ocean_assert(source != nullptr);
3262  ocean_assert(targetPositionPixels != nullptr);
3263 
3264  // as we do not initialize the following intermediate data,
3265  // we hopefully will not allocate memory on the stack each time this function is called
3266  OCEAN_ALIGN_DATA(16) unsigned int factorsTopLeft[4];
3267  OCEAN_ALIGN_DATA(16) unsigned int factorsTopRight[4];
3268  OCEAN_ALIGN_DATA(16) unsigned int factorsBottomLeft[4];
3269  OCEAN_ALIGN_DATA(16) unsigned int factorsBottomRight[4];
3270 
3271 
3272  // factorLeft = 128 - factorRight
3273  // factorTop = 128 - factorBottom
3274 
3275  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3276  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3277 
3278  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3279  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3280 
3281  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3282  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3283  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3284  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3285 
3286 
3287  // we store the interpolation factors
3288  _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3289  _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3290  _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3291  _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3292 
3293  for (unsigned int i = 0u; i < 4u; ++i)
3294  {
3295  if (validPixels[i])
3296  {
3297  const uint8_t* topLeft = source + offsetsTopLeft[i];
3298  const uint8_t* topRight = source + offsetsTopRight[i];
3299 
3300  const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3301  const uint8_t* bottomRight = source + offsetsBottomRight[i];
3302 
3303  const unsigned int& factorTopLeft = factorsTopLeft[i];
3304  const unsigned int& factorTopRight = factorsTopRight[i];
3305  const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3306  const unsigned int& factorBottomRight = factorsBottomRight[i];
3307 
3308  for (unsigned int n = 0u; n < tChannels; ++n)
3309  {
3310  ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3311  }
3312  }
3313  else
3314  {
3315  *targetPositionPixels = borderColor;
3316  }
3317 
3318  targetPositionPixels++;
3319  }
3320 }
3321 
3322 #endif // OCEAN_HARDWARE_SSE_VERSION
3323 
3324 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3325 
3326 template <unsigned int tChannels>
3327 void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
3328 {
3329  static_assert(tChannels >= 1u, "Invalid channel number!");
3330 
3331  ocean_assert(source && target);
3332  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3333  ocean_assert(targetWidth >= 4u && targetHeight > 0u);
3334  ocean_assert(source_A_target);
3335  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
3336 
3337  ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3338 
3339  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3340  const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3341 
3342  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3343 
3344  uint8_t zeroColor[tChannels] = {uint8_t(0)};
3345  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3346 
3347  unsigned int validPixels[4];
3348 
3349  unsigned int topLeftOffsetsElements[4];
3350  unsigned int topRightOffsetsElements[4];
3351  unsigned int bottomLeftOffsetsElements[4];
3352  unsigned int bottomRightOffsetsElements[4];
3353 
3354  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3355 
3356  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3357  const float32x4_t m128_f_X0 = vdupq_n_f32(float((*source_A_target)(0, 0)));
3358  const float32x4_t m128_f_X1 = vdupq_n_f32(float((*source_A_target)(1, 0)));
3359 
3360  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3361  {
3362  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3363 
3364  /*
3365  * We can slightly optimize the 3x3 matrix multiplication:
3366  *
3367  * | X0 Y0 Z0 | | x |
3368  * | X1 Y1 Z1 | * | y |
3369  * | 0 0 1 | | 1 |
3370  *
3371  * | xx | | X0 * x | | Y0 * y + Z0 |
3372  * | yy | = | X1 * x | + | Y1 * y + Z1 |
3373  *
3374  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
3375  *
3376  * C0 = Y0 * y + Z0
3377  * C1 = Y1 * y + Z1
3378  *
3379  * So the computation becomes:
3380  *
3381  * | x' | | X0 * x | | C0 |
3382  * | y' | = | X1 * x | + | C1 |
3383  */
3384 
3385  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3386  const float32x4_t m128_f_C0 = vdupq_n_f32(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
3387  const float32x4_t m128_f_C1 = vdupq_n_f32(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
3388 
3389  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3390  const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3391 
3392  // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
3393  const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3394 
3395  // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3396  const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3397  const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3398 
3399  // we store 4 floats: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3400  const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(float(sourceWidth - 1u));
3401  const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(float(sourceHeight - 1u));
3402 
3403  for (unsigned int x = 0u; x < targetWidth; x += 4u)
3404  {
3405  if (x + 4u > targetWidth)
3406  {
3407  // the last iteration will not fit into the target frame,
3408  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3409 
3410  ocean_assert(x >= 4u && targetWidth > 4u);
3411  const unsigned int newX = targetWidth - 4u;
3412 
3413  ocean_assert(x > newX);
3414  targetRow -= x - newX;
3415 
3416  x = newX;
3417 
3418  // the for loop will stop after this iteration
3419  ocean_assert(!(x + 4u < targetWidth));
3420  }
3421 
3422 
3423  // we need four successive x coordinate floats:
3424  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3425  float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3426  const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3427 
3428  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3429  const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3430  const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3431 
3432 
3433  // now we check whether we are inside the source frame
3434  const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero)); // sourcePosition.x() <= (sourceWidth - 1) && sourcePosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
3435  const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero)); // sourcePosition.y() <= (sourceHeight - 1) && sourcePosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
3436 
3437  const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_source_frame(sourcePosition) ? 0xFFFFFFFF : 0x00000000
3438 
3439 
3440  // we can stop here if all pixels are invalid
3441  const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3442  if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3443  {
3444 #ifdef OCEAN_DEBUG
3445  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3446  vst1q_u32(debugValidPixels, m128_u_validPixel);
3447  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3448 #endif
3449 
3450  targetRow[0] = *bColor;
3451  targetRow[1] = *bColor;
3452  targetRow[2] = *bColor;
3453  targetRow[3] = *bColor;
3454 
3455  targetRow += 4;
3456 
3457  continue;
3458  }
3459 
3460 
3461  // we store the result
3462  vst1q_u32(validPixels, m128_u_validPixel);
3463  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3464 
3465 
3466  // now we determine the left, top, right and bottom pixel used for the interpolation
3467  // left = floor(x); top = floor(y)
3468  const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3469  const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3470 
3471  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3472  const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3473  const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3474 
3475  // offset = y * stride + x * channels
3476  const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topLeftOffset = top * strideElements + left * channels
3477  const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topRightOffset = top * strideElements + right * channels
3478  const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements); // ...
3479  const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3480 
3481  // we store the offsets
3482  vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3483  vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3484  vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3485  vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3486 
3487 
3488  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3489 
3490  // we determine the fractional portions of the x' and y':
3491  float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3492  float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3493 
3494  // we use integer interpolation [0.0, 1.0] -> [0, 128]
3495  m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3496  m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3497 
3498  const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3499  const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3500 
3501  if constexpr (tChannels > 4u)
3502  {
3503  // normally we would simply call instead of copying the code of the function to this location
3504  // however, if calling the function instead of applying the code here directly
3505  // clang ends with code approx. 20% slower
3506  // thus we make a copy of the code and keep the function for demonstration purposes
3507 
3508  //interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetPixelData);
3509  //targetPixelData += 4;
3510 
3511  const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3512  const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3513 
3514  // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3515  // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3516  const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3517  const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3518  const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3519  const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3520 
3521  unsigned int tx_ty_s[4];
3522  unsigned int txty_s[4];
3523  unsigned int tx_tys[4];
3524  unsigned int txtys[4];
3525 
3526  // we store the interpolation factors
3527  vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3528  vst1q_u32(txty_s, m128_u_txty_);
3529  vst1q_u32(tx_tys, m128_u_tx_ty);
3530  vst1q_u32(txtys, m128_u_txty);
3531 
3532  for (unsigned int i = 0u; i < 4u; ++i)
3533  {
3534  if (validPixels[i])
3535  {
3536  ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3537  ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3538  ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3539  ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3540 
3541  const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3542  const uint8_t* topRight = source + topRightOffsetsElements[i];
3543 
3544  const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3545  const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3546 
3547  const unsigned int tx_ty_ = tx_ty_s[i];
3548  const unsigned int txty_ = txty_s[i];
3549  const unsigned int tx_ty = tx_tys[i];
3550  const unsigned int txty = txtys[i];
3551 
3552  ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3553 
3554  for (unsigned int n = 0u; n < tChannels; ++n)
3555  {
3556  ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3557  }
3558  }
3559  else
3560  {
3561  *targetRow = *bColor;
3562  }
3563 
3564  targetRow++;
3565  }
3566  }
3567  else
3568  {
3569  interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3570  targetRow += 4;
3571  }
3572  }
3573  }
3574 }
3575 
3576 template <unsigned int tChannels>
3577 void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
3578 {
3579  static_assert(tChannels >= 1u, "Invalid channel number!");
3580 
3581  ocean_assert(input != nullptr && output != nullptr);
3582  ocean_assert(inputWidth > 0u && inputHeight > 0u);
3583  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
3584  ocean_assert(input_H_output != nullptr);
3585 
3586  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3587 
3588  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3589  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3590 
3591  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3592 
3593  uint8_t zeroColor[tChannels] = {uint8_t(0)};
3594  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3595 
3596  unsigned int validPixels[4];
3597 
3598  unsigned int topLeftOffsetsElements[4];
3599  unsigned int topRightOffsetsElements[4];
3600  unsigned int bottomLeftOffsetsElements[4];
3601  unsigned int bottomRightOffsetsElements[4];
3602 
3603  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3604 
3605  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3606  const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
3607  const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
3608  const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
3609 
3610  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3611  {
3612  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3613 
3614  /*
3615  * We can slightly optimize the 3x3 matrix multiplication:
3616  *
3617  * | X0 Y0 Z0 | | x |
3618  * | X1 Y1 Z1 | * | y |
3619  * | X2 Y2 Z2 | | 1 |
3620  *
3621  * | xx | | X0 * x | | Y0 * y + Z0 |
3622  * | yy | = | X1 * x | + | Y1 * y + Z1 |
3623  * | zz | | X2 * x | | Y2 * y + Z2 |
3624  *
3625  * | xx | | X0 * x | | C0 |
3626  * | yy | = | X1 * x | + | C1 |
3627  * | zz | | X2 * x | | C3 |
3628  *
3629  * As y is constant within the inner loop, we can pre-calculate the following terms:
3630  *
3631  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
3632  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
3633  */
3634 
3635  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3636  const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
3637  const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
3638  const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
3639 
3640  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3641  const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3642 
3643  // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
3644  const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3645 
3646  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3647  const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3648  const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3649 
3650  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3651  const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(float(inputWidth - 1u));
3652  const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(float(inputHeight - 1u));
3653 
3654  for (unsigned int x = 0u; x < outputWidth; x += 4u)
3655  {
3656  if (x + 4u > outputWidth)
3657  {
3658  // the last iteration will not fit into the output frame,
3659  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3660 
3661  ocean_assert(x >= 4u && outputWidth > 4u);
3662  const unsigned int newX = outputWidth - 4u;
3663 
3664  ocean_assert(x > newX);
3665  outputPixelData -= x - newX;
3666 
3667  x = newX;
3668 
3669  // the for loop will stop after this iteration
3670  ocean_assert(!(x + 4u < outputWidth));
3671  }
3672 
3673 
3674  // we need four successive x coordinate floats:
3675  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3676  float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3677  const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3678 
3679  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3680  const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3681  const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3682  const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3683 
3684 #ifdef USE_DIVISION_ARM64_ARCHITECTURE
3685 
3686  // using the division available from ARM64 is more precise
3687  const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3688  const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3689 
3690 #else
3691 
3692  // we calculate the (approximated) inverse of zz
3693  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
3694  float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3695  inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
3696 
3697  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
3698  const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3699  const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3700 
3701 #endif // USE_DIVISION_ARM64_ARCHITECTURE
3702 
3703 
3704  // now we check whether we are inside the input frame
3705  const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
3706  const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
3707 
3708  const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
3709 
3710 
3711  // we can stop here if all pixels are invalid
3712  const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3713  if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3714  {
3715 #ifdef OCEAN_DEBUG
3716  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3717  vst1q_u32(debugValidPixels, m128_u_validPixel);
3718  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3719 #endif
3720 
3721  outputPixelData[0] = *bColor;
3722  outputPixelData[1] = *bColor;
3723  outputPixelData[2] = *bColor;
3724  outputPixelData[3] = *bColor;
3725 
3726  outputPixelData += 4;
3727 
3728  continue;
3729  }
3730 
3731 
3732  // we store the result
3733  vst1q_u32(validPixels, m128_u_validPixel);
3734  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3735 
3736 
3737  // now we determine the left, top, right and bottom pixel used for the interpolation
3738  // left = floor(x); top = floor(y)
3739  const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3740  const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3741 
3742  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3743  const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3744  const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3745 
3746  // offset = y * stride + x * channels
3747  const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topLeftOffset = top * strideElements + left * channels
3748  const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topRightOffset = top * strideElements + right * channels
3749  const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements); // ...
3750  const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3751 
3752  // we store the offsets
3753  vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3754  vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3755  vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3756  vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3757 
3758 
3759  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3760 
3761  // we determine the fractional portions of the x' and y':
3762  float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3763  float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3764 
3765  // we use integer interpolation [0.0, 1.0] -> [0, 128]
3766  m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3767  m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3768 
3769  const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3770  const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3771 
3772  if constexpr (tChannels > 4u)
3773  {
3774  // normally we would simply call instead of copying the code of the function to this location
3775  // however, if calling the function instead of applying the code here directly
3776  // clang ends with code approx. 20% slower
3777  // thus we make a copy of the code and keep the function for demonstration purposes
3778 
3779  //interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3780  //outputPixelData += 4;
3781 
3782  const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3783  const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3784 
3785  // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3786  // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3787  const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3788  const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3789  const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3790  const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3791 
3792  unsigned int tx_ty_s[4];
3793  unsigned int txty_s[4];
3794  unsigned int tx_tys[4];
3795  unsigned int txtys[4];
3796 
3797  // we store the interpolation factors
3798  vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3799  vst1q_u32(txty_s, m128_u_txty_);
3800  vst1q_u32(tx_tys, m128_u_tx_ty);
3801  vst1q_u32(txtys, m128_u_txty);
3802 
3803  for (unsigned int i = 0u; i < 4u; ++i)
3804  {
3805  if (validPixels[i])
3806  {
3807  ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3808  ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3809  ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3810  ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3811 
3812  const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3813  const uint8_t* topRight = input + topRightOffsetsElements[i];
3814 
3815  const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3816  const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3817 
3818  const unsigned int tx_ty_ = tx_ty_s[i];
3819  const unsigned int txty_ = txty_s[i];
3820  const unsigned int tx_ty = tx_tys[i];
3821  const unsigned int txty = txtys[i];
3822 
3823  ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3824 
3825  for (unsigned int n = 0u; n < tChannels; ++n)
3826  {
3827  ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3828  }
3829  }
3830  else
3831  {
3832  *outputPixelData = *bColor;
3833  }
3834 
3835  outputPixelData++;
3836  }
3837  }
3838  else
3839  {
3840  interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3841  outputPixelData += 4;
3842  }
3843  }
3844  }
3845 }
3846 
3847 template <>
3848 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3849 {
3850  ocean_assert(source != nullptr);
3851  ocean_assert(targetPositionPixels != nullptr);
3852 
3853  // as we do not initialize the following intermediate data,
3854  // we hopefully will not allocate memory on the stack each time this function is called
3855  DataType<uint8_t, 1u>::Type pixels[16];
3856 
3857  // we will store the pixel information in the following pattern:
3858  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3859  // BR3 BL3 TR3 TL3 BR2 BL2 TR2 TL2 BR1 BL1 TR1 TL1 BR0 BL0 TR0 TL0
3860 
3861  // we gather the individual source pixel values from the source image,
3862  // based on the calculated pixel locations
3863  for (unsigned int i = 0u; i < 4u; ++i)
3864  {
3865  if (validPixels[i])
3866  {
3867  pixels[i * 4u + 0u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopLeftElements[i]));
3868  pixels[i * 4u + 1u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopRightElements[i]));
3869  pixels[i * 4u + 2u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomLeftElements[i]));
3870  pixels[i * 4u + 3u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomRightElements[i]));
3871  }
3872  else
3873  {
3874  pixels[i * 4u + 0u] = borderColor;
3875  pixels[i * 4u + 1u] = borderColor;
3876  pixels[i * 4u + 2u] = borderColor;
3877  pixels[i * 4u + 3u] = borderColor;
3878  }
3879  }
3880 
3881  static_assert(sizeof(uint8x16_t) == sizeof(pixels), "Invalid data type!");
3882 
3883  const uint8x16_t m128_pixels = vld1q_u8((const uint8_t*)pixels);
3884 
3885 
3886  // factorLeft = 128 - factorRight
3887  // factorTop = 128 - factorBottom
3888 
3889  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3890  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3891 
3892  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3893  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3894 
3895  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3896  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3897  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3898  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3899 
3900  // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3901  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3902  // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3903 
3904  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3905  // FEDC BA98 7654 3210
3906  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3907 
3908 
3909  // we will simply extract each channel from the source pixels,
3910  // each extracted channel will be multiplied by the corresponding interpolation factor
3911  // and all interpolation results will be accumulated afterwards
3912 
3913  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3914 
3915  const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3916  const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3917  const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3918  const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3919 
3920  const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3921 
3922  // we add 8192 and shift by 14 bits
3923 
3924  const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3925 
3926  // finally we have the following result:
3927  // ---C ---8 ---4 ---0
3928  // and we need to extract the four pixel values:
3929  //
3930  // NOTE: Because of a possible bug in Clang affecting ARMv7, vget_lane_u32()
3931  // seems to assume 32-bit memory alignment for output location, which cannot
3932  // be guaranteed. This results in bus errors and crashes the application.
3933  // ARM64 is not affected.
3934 #if defined(__aarch64__)
3935 
3936  const uint8x8_t m64_mask0 = {0, 4, 1, 1, 1, 1, 1, 1};
3937  const uint8x8_t m64_mask1 = {1, 1, 0, 4, 1, 1, 1, 1};
3938 
3939  const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3940  const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3941 
3942  const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3943 
3944  const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3945  memcpy(targetPositionPixels, &result, sizeof(uint32_t));
3946 
3947 #else
3948 
3949  *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3950  *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3951  *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3952  *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3953 
3954 #endif
3955 }
3956 
3957 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels)
3958 {
3959  const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16); // factorLeft = 128 - factorRight, factorTop = 128 - factorBottomv
3960 
3961  const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3962  const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
3963 
3964  const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
3965  const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
3966 
3967  const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8); // intermediateTop = topLeft * factorLeft + topRight * factorRight
3968  const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8); // intermediateBottom = bottomLeft * factorLeft + bottomRight * factorRight
3969 
3970  const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8)); // result = intermediateTop * factorTop + intermediateBottom + factorBottom
3971  const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
3972 
3973  const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14)); // round(result / 16384.0)
3974 
3975  const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
3976 
3977  vst1_u8(targetPositionPixels, result_8x8);
3978 }
3979 
3980 template <>
3981 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 2u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
3982 {
3983  ocean_assert(source != nullptr);
3984  ocean_assert(targetPositionPixels != nullptr);
3985 
3986  typedef typename DataType<uint8_t, 2u>::Type PixelType;
3987 
3988  // as we do not initialize the following intermediate data,
3989  // we hopefully will not allocate memory on the stack each time this function is called
3990  PixelType topPixels[8];
3991  PixelType bottomPixels[8];
3992 
3993  // we will store the pixel information in the following pattern (here for YA):
3994  // FE DC BA 98 76 54 32 10
3995  // YA YA YA YA YA YA YA YA
3996  // TR TL TR TL TR TL TR TL
3997 
3998  // we gather the individual source pixel values from the source image,
3999  // based on the calculated pixel locations
4000  for (unsigned int i = 0u; i < 4u; ++i)
4001  {
4002  if (validPixels[i])
4003  {
4004  *(topPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4005  *(topPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4006  *(bottomPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4007  *(bottomPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4008  }
4009  else
4010  {
4011  *(topPixels + i * 2u + 0u) = borderColor;
4012  *(topPixels + i * 2u + 1u) = borderColor;
4013  *(bottomPixels + i * 2u + 0u) = borderColor;
4014  *(bottomPixels + i * 2u + 1u) = borderColor;
4015  }
4016  }
4017 
4018  static_assert(sizeof(uint32x4_t) == sizeof(topPixels), "Invalid data type!");
4019 
4020  const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topPixels));
4021  const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomPixels));
4022 
4023 
4024  // factorLeft = 128 - factorRight
4025  // factorTop = 128 - factorBottom
4026 
4027  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4028  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4029 
4030  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4031  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4032 
4033  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4034  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4035  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4036  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4037 
4038 
4039  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4040 
4041  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4042  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4043 
4044  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4045  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4046 
4047  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4048  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4049 
4050  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4051  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4052 
4053 
4054  // we add 8192 and shift by 14 bits
4055 
4056  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4057  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4058 
4059  // finaly we blend the interpolation results together to get the following pattern:
4060  // FE DC BA 98 76 54 32 10
4061  // 00 YA 00 YA 00 YA 00 YA
4062 
4063  const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4064 
4065  // we shuffle the 128 bit register to a 64 bit register:
4066 
4067  const uint8x8_t m64_mask0 = {0, 1, 4, 5, 2, 2, 2, 2};
4068  const uint8x8_t m64_mask1 = {2, 2, 2, 2, 0, 1, 4, 5};
4069 
4070  const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4071  const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4072 
4073  const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4074 
4075  // no we can store the following pattern as one block:
4076 
4077  // 76 54 32 10
4078  // YA YA YA YA
4079 
4080  vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4081 }
4082 
4083 template <>
4084 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4085 {
4086  ocean_assert(source != nullptr);
4087  ocean_assert(targetPositionPixels != nullptr);
4088 
4089  // as we do not initialize the following intermediate data,
4090  // we hopefully will not allocate memory on the stack each time this function is called
4091  uint32_t topLeftPixels[4];
4092  uint32_t topRightPixels[4];
4093  uint32_t bottomLeftPixels[4];
4094  uint32_t bottomRightPixels[4];
4095 
4096  // we will store the pixel information in the following pattern, note the padding byte after each pixel (here for RGB):
4097  // FEDCBA9876543210
4098  // BGR BGR BGR BGR
4099 
4100  // we gather the individual source pixel values from the source image,
4101  // based on the calculated pixel locations
4102  for (unsigned int i = 0u; i < 4u; ++i)
4103  {
4104  if (validPixels[i])
4105  {
4106  memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i], sizeof(uint8_t) * 3);
4107  memcpy(topRightPixels + i, source + offsetsTopRightElements[i], sizeof(uint8_t) * 3);
4108  memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i], sizeof(uint8_t) * 3);
4109  memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i], sizeof(uint8_t) * 3);
4110  }
4111  else
4112  {
4113  memcpy(topLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4114  memcpy(topRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4115  memcpy(bottomLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4116  memcpy(bottomRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4117  }
4118  }
4119 
4120  static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4121 
4122  const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4123  const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4124  const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4125  const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4126 
4127 
4128  // factorLeft = 128 - factorRight
4129  // factorTop = 128 - factorBottom
4130 
4131  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4132  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4133 
4134  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4135  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4136 
4137  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4138  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4139  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4140  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4141 
4142 
4143  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4144 
4145  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4146  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4147  uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4148 
4149  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4150  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4151  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4152 
4153  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4154  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4155  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4156 
4157  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4158  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4159  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4160 
4161 
4162  // we add 8192 and shift by 14 bits
4163 
4164  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4165  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4166  const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4167 
4168  // finaly we blend the interpolation results together
4169 
4170  const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4171 
4172  // we have to extract the get rid of the padding byte:
4173  // FEDCBA9876543210
4174  // BGR BGR BGR BGR
4175 
4176  uint32_t intermediateBuffer[4];
4177  vst1q_u32(intermediateBuffer, m128_interpolation);
4178 
4179  for (unsigned int i = 0u; i < 4u; ++i)
4180  {
4181  memcpy(targetPositionPixels + i, intermediateBuffer + i, sizeof(uint8_t) * 3);
4182  }
4183 }
4184 
4185 template <>
4186 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4187 {
4188  ocean_assert(source != nullptr);
4189  ocean_assert(targetPositionPixels != nullptr);
4190 
4191  typedef typename DataType<uint8_t, 4u>::Type PixelType;
4192 
4193  // as we do not initialize the following intermediate data,
4194  // we hopefully will not allocate memory on the stack each time this function is called
4195  PixelType topLeftPixels[4];
4196  PixelType topRightPixels[4];
4197  PixelType bottomLeftPixels[4];
4198  PixelType bottomRightPixels[4];
4199 
4200  // we will store the pixel information in the following pattern (here for RGBA):
4201  // FEDC BA98 7654 3210
4202  // ABGR ABGR ABGR ABGR
4203 
4204  // we gather the individual source pixel values from the source image,
4205  // based on the calculated pixel locations
4206  for (unsigned int i = 0u; i < 4u; ++i)
4207  {
4208  if (validPixels[i])
4209  {
4210  *(topLeftPixels + i) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4211  *(topRightPixels + i) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4212  *(bottomLeftPixels + i) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4213  *(bottomRightPixels + i) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4214  }
4215  else
4216  {
4217  *(topLeftPixels + i) = borderColor;
4218  *(topRightPixels + i) = borderColor;
4219  *(bottomLeftPixels + i) = borderColor;
4220  *(bottomRightPixels + i) = borderColor;
4221  }
4222  }
4223 
4224  static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4225 
4226  const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topLeftPixels));
4227  const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topRightPixels));
4228  const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomLeftPixels));
4229  const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomRightPixels));
4230 
4231 
4232  // factorLeft = 128 - factorRight
4233  // factorTop = 128 - factorBottom
4234 
4235  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4236  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4237 
4238  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4239  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4240 
4241  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4242  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4243  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4244  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4245 
4246 
4247  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4248 
4249  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4250  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4251  uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4252  uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4253 
4254  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4255  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4256  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4257  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4258 
4259  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4260  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4261  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4262  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4263 
4264  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4265  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4266  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4267  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4268 
4269 
4270  // we add 8192 and shift by 14 bits
4271 
4272  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4273  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4274  const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4275  const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4276 
4277  // finaly we blend the interpolation results together
4278 
4279  const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4280 
4281  vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4282 }
4283 
4284 template <unsigned int tChannels>
4285 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4286 {
4287  ocean_assert(source != nullptr);
4288  ocean_assert(targetPositionPixels != nullptr);
4289 
4290  // as we do not initialize the following intermediate data,
4291  // we hopefully will not allocate memory on the stack each time this function is called
4292  unsigned int factorsTopLeft[4];
4293  unsigned int factorsTopRight[4];
4294  unsigned int factorsBottomLeft[4];
4295  unsigned int factorsBottomRight[4];
4296 
4297 
4298  // factorLeft = 128 - factorRight
4299  // factorTop = 128 - factorBottom
4300 
4301  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4302  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4303 
4304  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4305  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4306 
4307  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4308  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4309  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4310  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4311 
4312 
4313  // we store the interpolation factors
4314  vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4315  vst1q_u32(factorsTopRight, m128_factorsTopRight);
4316  vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4317  vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4318 
4319  for (unsigned int i = 0u; i < 4u; ++i)
4320  {
4321  if (validPixels[i])
4322  {
4323  const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4324  const uint8_t* topRight = source + offsetsTopRightElements[i];
4325 
4326  const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4327  const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4328 
4329  const unsigned int& factorTopLeft = factorsTopLeft[i];
4330  const unsigned int& factorTopRight = factorsTopRight[i];
4331  const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4332  const unsigned int& factorBottomRight = factorsBottomRight[i];
4333 
4334  for (unsigned int n = 0u; n < tChannels; ++n)
4335  {
4336  ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4337  }
4338  }
4339  else
4340  {
4341  *targetPositionPixels = borderColor;
4342  }
4343 
4344  targetPositionPixels++;
4345  }
4346 }
4347 
4348 #endif // OCEAN_HARDWARE_NEON_VERSION
4349 
4350 template <unsigned int tChannels>
4351 inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4352 {
4353  static_assert(tChannels >= 1u, "Invalid channel number!");
4354 
4355  ocean_assert(input && output);
4356  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4357  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4358 
4359  ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4360  ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4361  ocean_assert(homographies);
4362 
4363  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4364 
4365  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4366  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4367 
4368  constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4369  const uint8_t* const bColor = borderColor ? borderColor : zeroColor;
4370 
4371  uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4372 
4373  const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4374  const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4375 
4376  const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4377  const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4378 
4379  ocean_assert(right - left > Numeric::eps());
4380  ocean_assert(bottom - top > Numeric::eps());
4381 
4382  const Scalar invWidth = Scalar(1) / Scalar(right - left);
4383  const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4384 
4385  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4386  {
4387  for (unsigned int x = 0; x < outputWidth; ++x)
4388  {
4389  Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4390 
4391  const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4392  const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4393 
4394  outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4395 
4396  const Scalar tx = 1 - _tx;
4397  const Scalar ty = 1 - _ty;
4398 
4399  const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4400  const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4401  const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4402  const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4403 
4404  const Scalar tTopLeft = tx * ty;
4405  const Scalar tTopRight = _tx * ty;
4406  const Scalar tBottomLeft = tx * _ty;
4407  const Scalar tBottomRight = _tx * _ty;
4408 
4409  const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4410  + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4411 
4412  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4413  {
4414  for (unsigned int c = 0u; c < tChannels; ++c)
4415  {
4416  outputData[c] = bColor[c];
4417  }
4418  }
4419  else
4420  {
4421  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4422  }
4423 
4424  outputData += tChannels;
4425  }
4426 
4427  outputData += outputPaddingElements;
4428  }
4429 }
4430 
4431 template <unsigned int tChannels>
4432 void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, unsigned int firstOutputRow, const unsigned int numberOutputRows)
4433 {
4434  static_assert(tChannels >= 1u, "Invalid channel number!");
4435 
4436  ocean_assert(input != nullptr && output != nullptr);
4437  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4438  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4439  ocean_assert(input_H_output != nullptr);
4440 
4441  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4442 
4443  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4444  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4445 
4446  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4447  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4448 
4449  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4450 
4451  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4452  {
4453  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4454  uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4455 
4456  /*
4457  * We can slightly optimize the 3x3 matrix multiplication:
4458  *
4459  * | X0 Y0 Z0 | | x |
4460  * | X1 Y1 Z1 | * | y |
4461  * | X2 Y2 Z2 | | 1 |
4462  *
4463  * | x' | | X0 * x | | Y0 * y + Z0 |
4464  * | y' | = | X1 * x | + | Y1 * y + Z1 |
4465  * | z' | | X2 * x | | Y2 * y + Z2 |
4466  *
4467  * As y is constant within the inner loop, we can pre-calculate the following terms:
4468  *
4469  * | x' | | (X0 * x + constValue0) / (X2 * x + constValue2) |
4470  * | y' | = | (X1 * x + constValue1) / (X2 * x + constValue2) |
4471  *
4472  * | p | = | (X * x + c) / (X2 * x + constValue2) |
4473  */
4474 
4475  const Vector2 X(input_H_output->data() + 0);
4476  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
4477 
4478  const Scalar X2 = (*input_H_output)(2, 0);
4479  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
4480 
4481  for (unsigned int x = 0; x < outputWidth; ++x)
4482  {
4483  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
4484 
4485 #ifdef OCEAN_DEBUG
4486  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
4487  ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
4488 #endif
4489 
4490  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4491  {
4492  *outputMaskData = 0xFF - maskValue;
4493  }
4494  else
4495  {
4496  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4497  *outputMaskData = maskValue;
4498  }
4499 
4500  outputData++;
4501  outputMaskData++;
4502  }
4503  }
4504 }
4505 
4506 template <unsigned int tChannels>
4507 inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4508 {
4509  static_assert(tChannels >= 1u, "Invalid channel number!");
4510 
4511  ocean_assert(input && output);
4512  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4513  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4514 
4515  ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4516  ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4517  ocean_assert(homographies);
4518 
4519  const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4520  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4521 
4522  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4523  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4524 
4525  uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4526  outputMask += firstOutputRow * outputMaskStrideElements;
4527 
4528  const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4529  const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4530 
4531  const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4532  const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4533 
4534  ocean_assert(right - left > Numeric::eps());
4535  ocean_assert(bottom - top > Numeric::eps());
4536 
4537  const Scalar invWidth = Scalar(1) / Scalar(right - left);
4538  const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4539 
4540  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4541  {
4542  for (unsigned int x = 0u; x < outputWidth; ++x)
4543  {
4544  Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4545 
4546  const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4547  const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4548 
4549  outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4550 
4551  const Scalar tx = 1 - _tx;
4552  const Scalar ty = 1 - _ty;
4553 
4554  const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4555  const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4556  const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4557  const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4558 
4559  const Scalar tTopLeft = tx * ty;
4560  const Scalar tTopRight = _tx * ty;
4561  const Scalar tBottomLeft = tx * _ty;
4562  const Scalar tBottomRight = _tx * _ty;
4563 
4564  const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4565  + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4566 
4567  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4568  {
4569  *outputMask = 0xFFu - maskValue;
4570  }
4571  else
4572  {
4573  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4574  *outputMask = maskValue;
4575  }
4576 
4577  outputData += tChannels;
4578  outputMask++;
4579  }
4580 
4581  outputData += outputPaddingElements;
4582  outputMask += outputMaskPaddingElements;
4583  }
4584 }
4585 
4586 template <unsigned int tChannels>
4587 void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4588 {
4589  static_assert(tChannels >= 1u, "Invalid channel number!");
4590 
4591  ocean_assert(inputCamera && outputCamera && normalizedHomography);
4592  ocean_assert(input && output);
4593 
4594  ocean_assert(firstRow + numberRows <= outputCamera->height());
4595 
4596  const unsigned int outputStrideElements = tChannels * outputCamera->width() + outputPaddingElements;
4597 
4598  const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4599  const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4600 
4601  const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4602 
4603  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4604 
4605  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4606  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4607 
4608  uint8_t* outputData = output + firstRow * outputStrideElements;
4609 
4610  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4611  {
4612  for (unsigned int x = 0; x < outputCamera->width(); ++x)
4613  {
4614  const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4615 
4616  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4617  {
4618  *((PixelType*)outputData) = *bColor;
4619  }
4620  else
4621  {
4622  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4623  }
4624 
4625  outputData += tChannels;
4626  }
4627 
4628  outputData += outputPaddingElements;
4629  }
4630 }
4631 
4632 template <unsigned int tChannels>
4633 void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
4634 {
4635  static_assert(tChannels >= 1u, "Invalid channel number!");
4636 
4637  ocean_assert(inputCamera != nullptr && outputCamera != nullptr && normalizedHomography != nullptr);
4638  ocean_assert(input != nullptr && output != nullptr);
4639 
4640  ocean_assert(firstRow + numberRows <= outputCamera->height());
4641 
4642  const unsigned int outputStrideElements = outputCamera->width() * tChannels + outputPaddingElements;
4643  const unsigned int outputMaskStrideElements = outputCamera->width() + outputMaskPaddingElements;
4644 
4645  const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4646  const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4647 
4648  const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4649 
4650  uint8_t* outputData = output + firstRow * outputStrideElements;
4651  outputMask += firstRow * outputMaskStrideElements;
4652 
4653  constexpr bool useDistortionParameters = true;
4654 
4655  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4656  {
4657  for (unsigned int x = 0; x < outputCamera->width(); ++x)
4658  {
4659  const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4660 
4661  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4662  {
4663  *outputMask = 0xFF - maskValue;
4664  }
4665  else
4666  {
4667  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4668  *outputMask = maskValue;
4669  }
4670 
4671  outputData += tChannels;
4672  ++outputMask;
4673  }
4674 
4675  outputData += outputPaddingElements;
4676  outputMask += outputMaskPaddingElements;
4677  }
4678 }
4679 
4680 template <unsigned int tChannels>
4681 void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4682 {
4683  static_assert(tChannels >= 1u, "Invalid channel number!");
4684 
4685  ocean_assert(input_LT_output != nullptr);
4686  ocean_assert(input != nullptr && output != nullptr);
4687 
4688  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4689  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4690 
4691  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4692 
4693  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4694  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4695 
4696  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4697 
4698  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4699 
4700  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4701 
4702  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4703  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4704 
4705  Memory rowLookupMemory = Memory::create<Vector2>(columns);
4706  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4707 
4708  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4709  {
4710  input_LT_output->bilinearValues(y, rowLookupData);
4711 
4712  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4713 
4714  for (unsigned int x = 0u; x < columns; ++x)
4715  {
4716  const Vector2& lookupValue = rowLookupData[x];
4717 
4718  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4719 
4720  if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4721  {
4722  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4723  }
4724  else
4725  {
4726  *outputData = *bColor;
4727  }
4728 
4729  outputData++;
4730  }
4731  }
4732 }
4733 
4734 template <typename T, unsigned int tChannels>
4735 void FrameInterpolatorBilinear::lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4736 {
4737  static_assert(tChannels >= 1u, "Invalid channel number!");
4738 
4739  ocean_assert((!std::is_same<uint8_t, T>::value));
4740 
4741  ocean_assert(input_LT_output != nullptr);
4742  ocean_assert(input != nullptr && output != nullptr);
4743 
4744  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4745  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4746 
4747  typedef typename DataType<T, tChannels>::Type PixelType;
4748 
4749  const T zeroColor[tChannels] = {T(0)};
4750  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4751 
4752  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4753 
4754  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4755 
4756  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4757 
4758  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4759  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4760 
4761  Memory rowLookupMemory = Memory::create<Vector2>(columns);
4762  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4763 
4764  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4765  {
4766  input_LT_output->bilinearValues(y, rowLookupData);
4767 
4768  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4769 
4770  for (unsigned int x = 0u; x < columns; ++x)
4771  {
4772  const Vector2& lookupValue = rowLookupData[x];
4773 
4774  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4775 
4776  if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4777  {
4778  interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4779  }
4780  else
4781  {
4782  *outputData = *bColor;
4783  }
4784 
4785  outputData++;
4786  }
4787  }
4788 }
4789 
4790 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4791 
4792 template <>
4793 inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4794 {
4795  ocean_assert(input_LT_output != nullptr);
4796  ocean_assert(input != nullptr && output != nullptr);
4797 
4798  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4799  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4800 
4801  typedef uint8_t PixelType;
4802 
4803  const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4804 
4805  const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
4806  ocean_assert(outputWidth >= 8u);
4807 
4808  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4809 
4810  const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4811  const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4812 
4813  Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4814  VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
4815 
4816  const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
4817  const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
4818 
4819  // [0.0f, 1.0f, 2.0f, 3.0f, ...]
4820  const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4821  const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4822  const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4823 
4824  const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4825 
4826  const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4827 
4828  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4829 
4830  const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
4831  const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
4832 
4833  const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4834  const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4835 
4836  unsigned int validPixels[8];
4837 
4838  unsigned int topLeftOffsetsElements[8];
4839  unsigned int bottomLeftOffsetsElements[8];
4840 
4841  uint8_t pixels[32];
4842 
4843  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4844  {
4845  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4846 
4847  input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
4848 
4849  float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4850  float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4851 
4852  const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
4853 
4854  for (unsigned int x = 0u; x < outputWidth; x += 8u)
4855  {
4856  if (x + 8u > outputWidth)
4857  {
4858  // the last iteration will not fit into the output frame,
4859  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
4860 
4861  ocean_assert(x >= 8u && outputWidth > 8u);
4862  const unsigned int newX = outputWidth - 8u;
4863 
4864  ocean_assert(x > newX);
4865  const unsigned int xOffset = x - newX;
4866 
4867  outputPixelData -= xOffset;
4868 
4869  if (offset)
4870  {
4871  additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(float(xOffset)));
4872  additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(float(xOffset)));
4873  }
4874 
4875  x = newX;
4876 
4877  // the for loop will stop after this iteration
4878  ocean_assert(!(x + 8u < outputWidth));
4879  }
4880 
4881  const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 0u));
4882  const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 4u));
4883 
4884  float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
4885  float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
4886 
4887  float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
4888  float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
4889 
4890  if (offset)
4891  {
4892  inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
4893  inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
4894 
4895  inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
4896  inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
4897 
4898  additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
4899  additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
4900  }
4901 
4902  // now we check whether we are inside the input frame
4903  const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() < (inputWidth - 1) ? 0xFFFFFF : 0x000000
4904  const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
4905 
4906  const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() < (inputHeight - 1) ? 0xFFFFFF : 0x000000
4907  const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
4908 
4909  const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
4910  const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
4911 
4912  vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
4913  vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
4914 
4915 
4916  const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
4917  const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
4918 
4919  const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
4920  const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
4921 
4922  const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4923  const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4924 
4925 
4926  const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
4927  vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
4928  const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
4929  vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
4930 
4931  const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
4932  vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
4933  const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
4934  vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
4935 
4936 
4937  // we determine the fractional portions of the x' and y' and [0.0, 1.0] -> [0, 128]
4938  float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
4939  float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
4940 
4941  float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
4942  float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
4943 
4944  const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
4945  const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
4946 
4947  const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
4948  const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
4949 
4950  const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
4951  const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
4952 
4953  const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
4954 
4955 
4956  vst1q_u8(pixels + 0, constantBorderColor_u_8x16); // initialize with border color
4957  vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
4958 
4959  struct LeftRightPixel
4960  {
4961  uint8_t left;
4962  uint8_t right;
4963  };
4964 
4965  static_assert(sizeof(LeftRightPixel) == 2, "Invalid data type!");
4966 
4967  // we gather the individual source pixel values from the source image,
4968  // based on the calculated pixel locations
4969  for (unsigned int i = 0u; i < 8u; ++i)
4970  {
4971  if (validPixels[i])
4972  {
4973  ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u); // we need to have one additional pixel to the right (as we copy two pixels at once)
4974  ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
4975 
4976  ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
4977  ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
4978  }
4979  }
4980 
4981  const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
4982  const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
4983 
4984  interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
4985 
4986  outputPixelData += 8;
4987  }
4988  }
4989 }
4990 
4991 template <unsigned int tChannels>
4992 void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4993 {
4994  ocean_assert(input_LT_output != nullptr);
4995  ocean_assert(input != nullptr && output != nullptr);
4996 
4997  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4998  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4999 
5000  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5001 
5002  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5003  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5004 
5005  const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
5006  ocean_assert(outputWidth >= 4u);
5007 
5008  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5009 
5010  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5011  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5012 
5013  Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5014  VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
5015 
5016  const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
5017  const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
5018 
5019  // [0.0f, 1.0f, 2.0f, 3.0f]
5020  const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5021  float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5022 
5023  const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5024 
5025  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5026 
5027  const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
5028  const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
5029 
5030  const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5031  const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5032  const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5033 
5034  unsigned int validPixels[4];
5035 
5036  unsigned int topLeftOffsetsElements[4];
5037  unsigned int topRightOffsetsElements[4];
5038  unsigned int bottomLeftOffsetsElements[4];
5039  unsigned int bottomRightOffsetsElements[4];
5040 
5041  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5042  {
5043  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5044 
5045  input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
5046 
5047  float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5048  const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
5049 
5050  for (unsigned int x = 0u; x < outputWidth; x += 4u)
5051  {
5052  if (x + 4u > outputWidth)
5053  {
5054  // the last iteration will not fit into the output frame,
5055  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
5056 
5057  ocean_assert(x >= 4u && outputWidth > 4u);
5058  const unsigned int newX = outputWidth - 4u;
5059 
5060  ocean_assert(x > newX);
5061  const unsigned int xOffset = x - newX;
5062 
5063  outputPixelData -= xOffset;
5064 
5065  if (offset)
5066  {
5067  additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(float(xOffset)));
5068  }
5069 
5070  x = newX;
5071 
5072  // the for loop will stop after this iteration
5073  ocean_assert(!(x + 4u < outputWidth));
5074  }
5075 
5076  const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x));
5077 
5078  float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5079  float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5080 
5081  if (offset)
5082  {
5083  inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5084  inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5085 
5086  additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5087  }
5088 
5089  // now we check whether we are inside the input frame
5090  const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
5091  const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
5092 
5093  const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
5094 
5095  vst1q_u32(validPixels, validPixels_u_32x4);
5096 
5097  const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5098  const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5099 
5100  const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5101  const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5102 
5103  const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
5104  const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5105  const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5106  const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5107 
5108  vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5109  vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5110  vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5111  vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5112 
5113  // we determine the fractional portions of the x' and y':
5114  float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5115  float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5116 
5117  // we use integer interpolation [0.0, 1.0] -> [0, 128]
5118  tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5119  ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5120 
5121  const uint32x4_t tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5122  const uint32x4_t ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5123 
5124  interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5125 
5126  outputPixelData += 4;
5127  }
5128  }
5129 }
5130 
5131 #endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5132 
5133 template <unsigned int tChannels>
5134 void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5135 {
5136  ocean_assert(input_LT_output != nullptr);
5137  ocean_assert(input != nullptr && output != nullptr);
5138 
5139  ocean_assert(inputWidth != 0u && inputHeight != 0u);
5140  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5141 
5142  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5143 
5144  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
5145 
5146  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5147  const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5148 
5149  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5150 
5151  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
5152  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
5153 
5154  Memory rowLookupMemory = Memory::create<Vector2>(columns);
5155  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
5156 
5157  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5158  {
5159  input_LT_output->bilinearValues(y, rowLookupData);
5160 
5161  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5162  uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5163 
5164  for (unsigned int x = 0u; x < columns; ++x)
5165  {
5166  const Vector2& lookupValue = rowLookupData[x];
5167 
5168  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
5169 
5170  if (inputPosition.x() >= 0 && inputPosition.y() >= 0 && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
5171  {
5172  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5173  *outputMaskData = maskValue;
5174  }
5175  else
5176  {
5177  *outputMaskData = 0xFFu - maskValue;
5178  }
5179 
5180  outputData++;
5181  outputMaskData++;
5182  }
5183  }
5184 }
5185 
5186 template <unsigned int tChannels>
5187 void FrameInterpolatorBilinear::scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
5188 {
5189  ocean_assert(source != nullptr && target != nullptr);
5190  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5191  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5192  ocean_assert(sourceX_s_targetX > 0.0);
5193  ocean_assert(sourceY_s_targetY > 0.0);
5194 
5195  if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5196  {
5197  FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5198  return;
5199  }
5200 
5201  if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5202  {
5203 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5204  if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5205  {
5206  worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5207  return;
5208  }
5209 #else
5210  worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5211 #endif
5212  }
5213  else
5214  {
5215  if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5216  {
5217 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5218  if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5219  {
5220  scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5221  return;
5222  }
5223 #endif
5224  }
5225 
5226  scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5227  }
5228 }
5229 
5230 template <unsigned int tChannels>
5231 void FrameInterpolatorBilinear::scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5232 {
5233  ocean_assert(source != nullptr && target != nullptr);
5234  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5235  ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5236  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5237 
5238  const Scalar sourceX_T_targetX = Scalar(sourceX_s_targetX);
5239  const Scalar sourceY_T_targetY = Scalar(sourceY_s_targetY);
5240 
5241  /*
5242  * We determine the sub-pixel accurate source location for each target pixel as follows:
5243  *
5244  * Example with a downsampling by factor 4:
5245  * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
5246  * targetRow with 3 pixels: | 0 1 2 |
5247  *
5248  * Thus, the source row can be separated into three blocks;
5249  * and we want to extract the color information from the center of the blocks:
5250  * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
5251  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 4)
5252  *
5253  * Thus, we add 0.5 to each target coordinate before converting it to a source location;
5254  * and subtract 0.5 again afterwards:
5255  * sourceX = (targetX + 0.5) * sourceX_s_targetX - 0.5
5256  *
5257  * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
5258  * (1 + 0.5) * 4 - 0.5 = 5.5
5259  *
5260  *
5261  * Example with a downsampling by factor 3:
5262  * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
5263  * targetRow with 3 pixels: | 0 1 2 |
5264  *
5265  * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
5266  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 3)
5267  *
5268  * e.g., (0 + 0.5) * 3 - 0.5 = 1
5269  * (1 + 0.5) * 3 - 0.5 = 4
5270  *
5271  *
5272  * Example with a downsampling by factor 2:
5273  * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
5274  * targetRow with 3 pixels: | 0 1 2 |
5275  *
5276  * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
5277  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 2)
5278  *
5279  * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
5280  * (1 + 0.5) * 2 - 0.5 = 2.5
5281  *
5282  *
5283  * we can simplify the calculation (as we have a constant term):
5284  * sourceX = (sourceX_s_targetX * targetX) + (sourceX_s_targetX * 0.5 - 0.5)
5285  */
5286 
5287  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5288 
5289  const Scalar sourceX_T_targetXOffset = sourceX_T_targetX * Scalar(0.5) - Scalar(0.5);
5290  const Scalar sourceY_T_targetYOffset = sourceY_T_targetY * Scalar(0.5) - Scalar(0.5);
5291 
5292  const Scalar sourceWidth_1 = Scalar(sourceWidth - 1u);
5293  const Scalar sourceHeight_1 = Scalar(sourceHeight - 1u);
5294 
5295  target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5296 
5297  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5298  {
5299  const Scalar sy = minmax(Scalar(0), sourceY_T_targetYOffset + sourceY_T_targetY * Scalar(y), sourceHeight_1);
5300  ocean_assert(sy >= Scalar(0) && sy < Scalar(sourceHeight));
5301 
5302  const unsigned int sTop = (unsigned int)sy;
5303  ocean_assert(sy >= Scalar(sTop));
5304 
5305  const Scalar ty = sy - Scalar(sTop);
5306  ocean_assert(ty >= 0 && ty <= 1);
5307 
5308  const unsigned int factorBottom = (unsigned int)(ty * Scalar(128) + Scalar(0.5));
5309  const unsigned int factorTop = 128u - factorBottom;
5310 
5311  const uint8_t* const sourceTop = source + sourceStrideElements * sTop;
5312  const uint8_t* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5313 
5314  for (unsigned int x = 0; x < targetWidth; ++x)
5315  {
5316  const Scalar sx = minmax(Scalar(0), sourceX_T_targetXOffset + sourceX_T_targetX * Scalar(x), sourceWidth_1);
5317  ocean_assert(sx >= Scalar(0) && sx < Scalar(sourceWidth));
5318 
5319  const unsigned int sLeft = (unsigned int)sx;
5320  ocean_assert(sx >= Scalar(sLeft));
5321 
5322  const Scalar tx = sx - Scalar(sLeft);
5323  ocean_assert(tx >= 0 && tx <= 1);
5324 
5325  const unsigned int factorRight = (unsigned int)(tx * Scalar(128) + Scalar(0.5));
5326  const unsigned int factorLeft = 128u - factorRight;
5327 
5328  const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5329 
5330  const uint8_t* const sourceTopLeft = sourceTop + sLeft * tChannels;
5331  const uint8_t* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5332 
5333  const unsigned int factorTopLeft = factorTop * factorLeft;
5334  const unsigned int factorTopRight = factorTop * factorRight;
5335  const unsigned int factorBottomLeft = factorBottom * factorLeft;
5336  const unsigned int factorBottomRight = factorBottom * factorRight;
5337 
5338  for (unsigned int n = 0u; n < tChannels; ++n)
5339  {
5340  target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5341  + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5342  }
5343 
5344  target += tChannels;
5345  }
5346 
5347  target += targetPaddingElements;
5348  }
5349 }
5350 
5351 template <typename T>
5352 void FrameInterpolatorBilinear::interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom)
5353 {
5354  ocean_assert(sourceRowTop != nullptr);
5355  ocean_assert(sourceRowBottom != nullptr);
5356  ocean_assert(targetRow != nullptr);
5357  ocean_assert(elements >= 1u);
5358  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5359 
5360  typedef typename FloatTyper<T>::Type FloatType;
5361 
5362  const FloatType internalFactorBottom = FloatType(factorBottom);
5363  const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5364 
5365  for (unsigned int n = 0u; n < elements; ++n)
5366  {
5367  targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5368  }
5369 }
5370 
5371 template <typename T, unsigned int tChannels>
5372 void FrameInterpolatorBilinear::interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
5373 {
5374  static_assert(tChannels != 0u, "Invalid channel number!");
5375 
5376  ocean_assert(extendedSourceRow != nullptr);
5377  ocean_assert(targetRow != nullptr);
5378  ocean_assert(targetWidth >= 1u);
5379  ocean_assert(interpolationLocations != nullptr);
5380  ocean_assert(interpolationFactorsRight != nullptr);
5381  ocean_assert(channels == tChannels);
5382 
5383  typedef typename FloatTyper<T>::Type FloatType;
5384 
5385  for (unsigned int x = 0u; x < targetWidth; ++x)
5386  {
5387  const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5388  ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5389 
5390  const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5391 
5392  const unsigned int& leftLocation = interpolationLocations[x];
5393  const unsigned int rightLocation = leftLocation + tChannels; // location is defined in relation to elements, not to pixels
5394 
5395  for (unsigned int n = 0u; n < tChannels; ++n)
5396  {
5397  targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5398  }
5399  }
5400 }
5401 
5402 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5403 
5404 #ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5405 
5406 template <>
5407 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5408 {
5409  ocean_assert(source != nullptr && target != nullptr);
5410  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5411  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5412  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5413  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5414  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5415 
5416  ocean_assert(sourcePaddingElements == 0u); // not supported
5417  ocean_assert(targetPaddingElements == 0u);
5418 
5419  typedef typename DataType<uint8_t, 2u>::Type PixelType;
5420 
5421  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5422  const PixelType* const sourcePixelData = (const PixelType*)source;
5423 
5424  // our offset values for the eight left pixels in relation to the first pixel of the row
5425  unsigned int leftOffsets[8];
5426 
5427  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5428  // fixedPointLocation = floatLocation * 2^16
5429  //
5430  // [FEDCBA98, 76543210]
5431  // [pixel , subpixel]
5432  //
5433  // fixedPointLocation = pixel + subpixel / 2^16
5434  //
5435  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5436  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5437 
5438  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5439  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5440 
5441  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5442  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5443 
5444  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5445  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5446 
5447  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5448  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5449 
5450  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5451  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5452 
5453  // we store 4 integers: [0, 0, 0, 0]
5454  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5455 
5456  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5457  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5458 
5459  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5460  {
5461  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5462 
5463  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5464  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5465  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5466 
5467  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5468  // factorTop = 128 - factorBottom
5469  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5470 
5471  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5472 
5473  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5474  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5475 
5476  for (unsigned int x = 0; x < targetWidth; x += 8u)
5477  {
5478  if (x + 8u > targetWidth)
5479  {
5480  // the last iteration will not fit into the output frame,
5481  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5482 
5483  ocean_assert(x >= 8u && targetWidth > 8u);
5484  const unsigned int newX = targetWidth - 8u;
5485 
5486  ocean_assert(x > newX);
5487  targetPixelData -= x - newX;
5488 
5489  x = newX;
5490 
5491  // the for loop will stop after this iteration
5492  ocean_assert(!(x + 8u < targetWidth));
5493  }
5494 
5495 
5496  // we need four successive x coordinate floats:
5497  // [x + 3, x + 2, x + 1; x + 0]
5498  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5499  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5500 
5501  // we calculate the four source locations for our four target locations
5502  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5503  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5504 
5505  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5506  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5507 
5508  // now we determine the pixel/integer accurate source locations
5509  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5510  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5511  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5512 
5513  // we store the offsets we have calculated
5514  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5515  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5516 
5517 
5518 
5519  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
5520  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
5521 
5522  uint8x8x2_t topLeftPixels;
5523  uint8x8x2_t topRightPixels;
5524 
5525  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5526  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5527 
5528  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5529  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5530 
5531  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5532  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5533 
5534  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5535  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5536 
5537  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5538  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5539 
5540  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5541  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5542 
5543  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5544  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5545 
5546  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5547  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5548 
5549 
5550  // we load the individual pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
5551 
5552  uint8x8x2_t bottomLeftPixels;
5553  uint8x8x2_t bottomRightPixels;
5554 
5555  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5556  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5557 
5558  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5559  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5560 
5561  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5562  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5563 
5564  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5565  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5566 
5567  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5568  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5569 
5570  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5571  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5572 
5573  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5574  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5575 
5576  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5577  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5578 
5579 
5580 
5581  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5582  // we need an accuracy of 7 bits (values between 0 and 128):
5583  // 76 54 32 10
5584  // [F3 F2 F1 F0]
5585  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5586  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5587 
5588  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5589  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5590  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5591  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5592 
5593 
5594 
5595  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5596  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5597  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5598 
5599  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5600  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5601 
5602  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5603  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5604 
5605 
5606 
5607  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5608  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5609  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5610 
5611  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5612  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5613 
5614  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5615  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5616 
5617 
5618 
5619  // finnally we determine the interpolation result between top and bottom row
5620  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5621  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5622 
5623  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5624  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5625 
5626 
5627  // we narrow down the interpolation results and we store them
5628  uint8x8x2_t result;
5629  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5630  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5631 
5632  // we write back the results and interleave them automatically
5633  vst2_u8((uint8_t*)targetPixelData, result);
5634 
5635  targetPixelData += 8;
5636  }
5637 
5638  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5639  // **TODO** this is just a temporary solution, check how we can avoid this additional step
5640 
5641  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5642 
5643  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5644  {
5645  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5646 
5647  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5648  ocean_assert(lastSourcePixelLeft < sourceWidth);
5649  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5650 
5651  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5652 
5653  const unsigned int factorRight = factorRight_fixed16 >> 9u;
5654  const unsigned int factorLeft = 128u - factorRight;
5655 
5656  for (unsigned int c = 0u; c < 2u; ++c)
5657  {
5658  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5659  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5660  }
5661  }
5662  }
5663 }
5664 
5665 #endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5666 
5667 #ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5668 
5669 template <>
5670 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5671 {
5672  ocean_assert(source != nullptr && target != nullptr);
5673  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5674  ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5675  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5676  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5677  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5678 
5679  ocean_assert(sourcePaddingElements == 0u); // not supported
5680  ocean_assert(targetPaddingElements == 0u);
5681 
5682  typedef typename DataType<uint8_t, 2u>::Type PixelType;
5683 
5684  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5685  const PixelType* const sourcePixelData = (const PixelType*)source;
5686 
5687  // our offset values for the four left pixels in relation to the first pixel of the row
5688  unsigned int leftOffsets[8];
5689 
5690  // our color values of the eight top and bottom pixels (32 bit = 16 bit left and 16 bit right)
5691  unsigned int topPixels[8];
5692  unsigned int bottomPixels[8];
5693 
5694  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5695  // fixedPointLocation = floatLocation * 2^16
5696  //
5697  // [FEDCBA98, 76543210]
5698  // [pixel , subpixel]
5699  //
5700  // fixedPointLocation = pixel + subpixel / 2^16
5701  //
5702  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5703  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5704 
5705  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5706  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5707 
5708  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5709  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5710 
5711  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5712  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5713 
5714  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5715  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5716 
5717  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5718  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5719 
5720  // we store 4 integers: [0, 0, 0, 0]
5721  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5722 
5723  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5724  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5725 
5726  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5727  {
5728  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5729 
5730  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5731  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5732  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5733 
5734  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5735  // factorTop = 128 - factorBottom
5736  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5737 
5738  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5739 
5740  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5741  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5742 
5743  for (unsigned int x = 0; x < targetWidth; x += 8u)
5744  {
5745  if (x + 8u > targetWidth)
5746  {
5747  // the last iteration will not fit into the output frame,
5748  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5749 
5750  ocean_assert(x >= 8u && targetWidth > 8u);
5751  const unsigned int newX = targetWidth - 8u;
5752 
5753  ocean_assert(x > newX);
5754  targetPixelData -= x - newX;
5755 
5756  x = newX;
5757 
5758  // the for loop will stop after this iteration
5759  ocean_assert(!(x + 8u < targetWidth));
5760  }
5761 
5762 
5763  // we need four successive x coordinate floats:
5764  // [x + 3, x + 2, x + 1; x + 0]
5765  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5766  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5767 
5768  // we calculate the four source locations for our four target locations
5769  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5770  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5771 
5772  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5773  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5774 
5775  // now we determine the pixel/integer accurate source locations
5776  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5777  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5778  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5779 
5780  // we store the offsets we have calculated
5781  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5782  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5783 
5784 
5785 
5786  // we load the left and the right pixels into an intermediate buffer
5787  // with following pattern (with top-left TL, and top-right TR):
5788  // F E D C B A 9 8 7 6 5 4 3 2 1 0
5789  // [TR3 TR3 TL3 TL3 TR2 TR2 TL2 TL2 TR1 TR1 TL1 TL1 TR0 TR0 TL0 TL0]
5790  // [TR7 TR7 TL7 TL7 TR6 TR6 TL6 TL6 TR5 TR5 TL5 TL5 TR4 TR4 TL4 TL4]
5791 
5792  for (unsigned int n = 0u; n < 8u; ++n)
5793  {
5794  topPixels[n] = *(unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
5795  }
5796 
5797  const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
5798  const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
5799 
5800  for (unsigned int n = 0u; n < 8u; ++n)
5801  {
5802  bottomPixels[n] = *(unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
5803  }
5804 
5805  const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
5806  const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
5807 
5808 
5809  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5810  // we need an accuracy of 7 bits (values between 0 and 128):
5811  // 76 54 32 10
5812  // [F3 F2 F1 F0]
5813  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5814  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5815 
5816  // as we will have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5817  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5818  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5819 
5820  // nw we have the interpolation factors for 8 left and 8 right pixels:
5821  // 7 6 5 4 3 2 1 0
5822  // [F7 F6 F5 F4 F3 F2 F1 F0]
5823  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5824 
5825 
5826  // we de-interleave the top pixels to left and right pixels:
5827  // F E D C B A 9 8 7 6 5 4 3 2 1 0
5828  // [TL7 TL7 TL6 TL6 TL5 TL5 TL4 TL4 TL3 TL3 TL2 TL2 TL1 TL1 TL0 TL0]
5829  // [TR7 TR7 TR6 TR6 TR5 TR5 TR4 TR4 TR3 TR3 TR2 TR2 TR1 TR1 TR0 TR0]
5830  const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
5831 
5832  // we de-interleave the pixels again to separate channel 0 and channel 1:
5833  // 7 6 5 4 3 2 1 0
5834  // channel 0: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5835  // channel 1: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5836  const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
5837  const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
5838 
5839  const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
5840  const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
5841 
5842  const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
5843  const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
5844 
5845 
5846  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5847  uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
5848  uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
5849 
5850  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
5851  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
5852 
5853  const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5854  const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5855 
5856 
5857  // we proceed with the bottom pixels (as we did with the top pixels)
5858  const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
5859 
5860  const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
5861  const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
5862 
5863  const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
5864  const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
5865 
5866  const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
5867  const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
5868 
5869 
5870  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5871  m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
5872  m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
5873 
5874  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
5875  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
5876 
5877  const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5878  const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5879 
5880 
5881  // finnally we determine the interpolation result between top and bottom row
5882  m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
5883  m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
5884 
5885  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
5886  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
5887 
5888 
5889  // we narrow down the interpolation results and we store them
5890  uint8x8x2_t m2_64_result;
5891  m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5892  m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5893 
5894  // we write back the results and interleave them automatically
5895  vst2_u8((uint8_t*)targetPixelData, m2_64_result);
5896 
5897  targetPixelData += 8;
5898  }
5899 
5900  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5901  // **TODO** this is just a temporary solution, check how we can avoid this additional step
5902 
5903  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5904 
5905  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5906  {
5907  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5908 
5909  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5910  ocean_assert(lastSourcePixelLeft < sourceWidth);
5911  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5912 
5913  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5914 
5915  const unsigned int factorRight = factorRight_fixed16 >> 9u;
5916  const unsigned int factorLeft = 128u - factorRight;
5917 
5918  for (unsigned int c = 0u; c < 2u; ++c)
5919  {
5920  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5921  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5922  }
5923  }
5924  }
5925 }
5926 
5927 #endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5928 
5929 #ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5930 
5931 template <>
5932 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5933 {
5934  ocean_assert(source != nullptr && target != nullptr);
5935  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5936  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5937  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5938  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5939  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5940 
5941  ocean_assert(sourcePaddingElements == 0u); // not supported
5942  ocean_assert(targetPaddingElements == 0u);
5943 
5944  typedef typename DataType<uint8_t, 3u>::Type PixelType;
5945 
5946  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5947  const PixelType* const sourcePixelData = (const PixelType*)source;
5948 
5949  // our offset values for the eight left pixels in relation to the first pixel of the row
5950  unsigned int leftOffsets[8];
5951 
5952  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5953  // fixedPointLocation = floatLocation * 2^16
5954  //
5955  // [FEDCBA98, 76543210]
5956  // [pixel , subpixel]
5957  //
5958  // fixedPointLocation = pixel + subpixel / 2^16
5959  //
5960  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5961  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5962 
5963  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5964  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5965 
5966  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5967  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5968 
5969  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5970  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5971 
5972  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5973  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5974 
5975  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5976  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5977 
5978  // we store 4 integers: [0, 0, 0, 0]
5979  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5980 
5981  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5982  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5983 
5984  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5985  {
5986  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5987 
5988  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5989  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5990  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5991 
5992  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5993  // factorTop = 128 - factorBottom
5994  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5995 
5996  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5997 
5998  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5999  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6000 
6001  for (unsigned int x = 0; x < targetWidth; x += 8u)
6002  {
6003  if (x + 8u > targetWidth)
6004  {
6005  // the last iteration will not fit into the output frame,
6006  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6007 
6008  ocean_assert(x >= 8u && targetWidth > 8u);
6009  const unsigned int newX = targetWidth - 8u;
6010 
6011  ocean_assert(x > newX);
6012  targetPixelData -= x - newX;
6013 
6014  x = newX;
6015 
6016  // the for loop will stop after this iteration
6017  ocean_assert(!(x + 8u < targetWidth));
6018  }
6019 
6020 
6021  // we need four successive x coordinate floats:
6022  // [x + 3, x + 2, x + 1; x + 0]
6023  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6024  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6025 
6026  // we calculate the four source locations for our four target locations
6027  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6028  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6029 
6030  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6031  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6032 
6033  // now we determine the pixel/integer accurate source locations
6034  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6035  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6036  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6037 
6038  // we store the offsets we have calculated
6039  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6040  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6041 
6042 
6043 
6044  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6045  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6046 
6047  uint8x8x3_t topLeftPixels;
6048  uint8x8x3_t topRightPixels;
6049 
6050  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6051  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6052 
6053  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6054  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6055 
6056  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6057  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6058 
6059  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6060  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6061 
6062  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6063  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6064 
6065  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6066  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6067 
6068  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6069  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6070 
6071  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6072  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6073 
6074 
6075  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6076 
6077  uint8x8x3_t bottomLeftPixels;
6078  uint8x8x3_t bottomRightPixels;
6079 
6080  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6081  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6082 
6083  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6084  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6085 
6086  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6087  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6088 
6089  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6090  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6091 
6092  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6093  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6094 
6095  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6096  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6097 
6098  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6099  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6100 
6101  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6102  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6103 
6104 
6105 
6106  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6107  // we need an accuracy of 7 bits (values between 0 and 128):
6108  // 76 54 32 10
6109  // [F3 F2 F1 F0]
6110  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6111  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6112 
6113  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6114  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6115  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6116  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6117 
6118 
6119 
6120  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6121  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6122  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6123  uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6124 
6125  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6126  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6127  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6128 
6129  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6130  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6131  uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6132 
6133 
6134 
6135  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6136  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6137  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6138  m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6139 
6140  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6141  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6142  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6143 
6144  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6145  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6146  uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6147 
6148 
6149 
6150  // finnally we determine the interpolation result between top and bottom row
6151  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6152  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6153  m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6154 
6155  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6156  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6157  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6158 
6159 
6160  // we narrow down the interpolation results and we store them
6161  uint8x8x3_t result;
6162  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6163  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6164  result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6165 
6166  // we write back the results and interleave them automatically
6167  vst3_u8((uint8_t*)targetPixelData, result);
6168 
6169  targetPixelData += 8;
6170  }
6171 
6172  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6173  // **TODO** this is just a temporary solution, check how we can avoid this additional step
6174 
6175  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6176 
6177  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6178  {
6179  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6180 
6181  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6182  ocean_assert(lastSourcePixelLeft < sourceWidth);
6183  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6184 
6185  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6186 
6187  const unsigned int factorRight = factorRight_fixed16 >> 9u;
6188  const unsigned int factorLeft = 128u - factorRight;
6189 
6190  for (unsigned int c = 0u; c < 3u; ++c)
6191  {
6192  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6193  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6194  }
6195  }
6196  }
6197 }
6198 
6199 #endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
6200 
6201 #ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6202 
6203 /// \cond DOXYGEN_DO_NOT_DOCUMENT
6204 
6205 template <>
6206 inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6207 {
6208  ocean_assert(source != nullptr && target != nullptr);
6209  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6210  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6211  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6212  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6213  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6214 
6215  ocean_assert(sourcePaddingElements == 0u); // not supported
6216  ocean_assert(targetPaddingElements == 0u);
6217 
6218  typedef typename DataType<uint8_t, 4u>::Type PixelType;
6219 
6220  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6221  const PixelType* const sourcePixelData = (const PixelType*)source;
6222 
6223  // our offset values for the eight left pixels in relation to the first pixel of the row
6224  unsigned int leftOffsets[8];
6225 
6226  // this function uses fixed point numbers with 16 bit for the calculation of const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6227  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6228 
6229  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
6230  // fixedPointLocation = floatLocation * 2^16
6231  //
6232  // [FEDCBA98, 76543210]
6233  // [pixel , subpixel]
6234  //
6235  // fixedPointLocation = pixel + subpixel / 2^16
6236  //
6237  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
6238  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
6239 
6240  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6241  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6242 
6243  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6244  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6245 
6246  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6247  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6248 
6249  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6250  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6251 
6252  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
6253  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6254 
6255  // we store 4 integers: [0, 0, 0, 0]
6256  const int32x4_t m128_s_zero = vdupq_n_s32(0);
6257 
6258  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6259  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6260 
6261  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6262  {
6263  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6264 
6265  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6266  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6267  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6268 
6269  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6270  // factorTop = 128 - factorBottom
6271  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6272 
6273  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6274 
6275  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6276  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6277 
6278  for (unsigned int x = 0; x < targetWidth; x += 8u)
6279  {
6280  if (x + 8u > targetWidth)
6281  {
6282  // the last iteration will not fit into the output frame,
6283  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6284 
6285  ocean_assert(x >= 8u && targetWidth > 8u);
6286  const unsigned int newX = targetWidth - 8u;
6287 
6288  ocean_assert(x > newX);
6289  targetPixelData -= x - newX;
6290 
6291  x = newX;
6292 
6293  // the for loop will stop after this iteration
6294  ocean_assert(!(x + 8u < targetWidth));
6295  }
6296 
6297 
6298  // we need four successive x coordinate floats:
6299  // [x + 3, x + 2, x + 1; x + 0]
6300  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6301  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6302 
6303  // we calculate the four source locations for our four target locations
6304  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6305  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6306 
6307  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6308  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6309 
6310  // now we determine the pixel/integer accurate source locations
6311  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6312  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6313  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6314 
6315  // we store the offsets we have calculated
6316  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6317  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6318 
6319 
6320 
6321  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6322  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6323 
6324  uint8x8x4_t topLeftPixels;
6325  uint8x8x4_t topRightPixels;
6326 
6327  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6328  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6329 
6330  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6331  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6332 
6333  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6334  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6335 
6336  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6337  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6338 
6339  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6340  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6341 
6342  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6343  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6344 
6345  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6346  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6347 
6348  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6349  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6350 
6351 
6352  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6353 
6354  uint8x8x4_t bottomLeftPixels;
6355  uint8x8x4_t bottomRightPixels;
6356 
6357  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6358  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6359 
6360  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6361  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6362 
6363  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6364  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6365 
6366  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6367  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6368 
6369  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6370  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6371 
6372  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6373  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6374 
6375  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6376  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6377 
6378  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6379  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6380 
6381 
6382 
6383  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6384  // we need an accuracy of 7 bits (values between 0 and 128):
6385  // 76 54 32 10
6386  // [F3 F2 F1 F0]
6387  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6388  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6389 
6390  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6391  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6392  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6393  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6394 
6395 
6396 
6397  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6398  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6399  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6400  uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6401  uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6402 
6403  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6404  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6405  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6406  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6407 
6408  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6409  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6410  uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6411  uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6412 
6413 
6414 
6415  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6416  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6417  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6418  m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6419  m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6420 
6421  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6422  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6423  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6424  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6425 
6426  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6427  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6428  uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6429  uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6430 
6431 
6432 
6433  // finnally we determine the interpolation result between top and bottom row
6434  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6435  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6436  m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6437  m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6438 
6439  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6440  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6441  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6442  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6443 
6444 
6445  // we narrow down the interpolation results and we store them
6446  uint8x8x4_t result;
6447  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6448  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6449  result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6450  result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6451 
6452  // we write back the results and interleave them automatically
6453  vst4_u8((uint8_t*)targetPixelData, result);
6454 
6455  targetPixelData += 8;
6456  }
6457 
6458  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6459  // **TODO** this is just a temporary solution, check how we can avoid this additional step
6460 
6461  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6462 
6463  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6464  {
6465  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6466 
6467  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6468  ocean_assert(lastSourcePixelLeft < sourceWidth);
6469  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6470 
6471  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6472 
6473  const unsigned int factorRight = factorRight_fixed16 >> 9u;
6474  const unsigned int factorLeft = 128u - factorRight;
6475 
6476  for (unsigned int c = 0u; c < 4u; ++c)
6477  {
6478  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6479  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6480  }
6481  }
6482  }
6483 }
6484 
6485 /// \endcond
6486 
6487 #endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6488 
6489 template <>
6490 inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(const float* sourceRowTop, const float* sourceRowBottom, float* targetRow, const unsigned int elements, const float factorBottom)
6491 {
6492  ocean_assert(sourceRowTop != nullptr);
6493  ocean_assert(sourceRowBottom != nullptr);
6494  ocean_assert(targetRow != nullptr);
6495  ocean_assert(elements >= 16u);
6496  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6497 
6498  // [1.0f, 1.0f, 1.0f, 1.0f]
6499  const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6500 
6501  const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6502  const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4); // factorTop = 1 - factorBottom
6503 
6504  for (unsigned int n = 0u; n < elements; n += 16u)
6505  {
6506  if (n + 16u > elements)
6507  {
6508  // the last iteration will not fit into the output frame,
6509  // so we simply shift x left by some elements (at most 15) and we will calculate some elements again
6510 
6511  ocean_assert(n >= 16u && elements > 16u);
6512  const unsigned int offset = n - (elements - 16u);
6513  ocean_assert(offset < 16u);
6514 
6515  sourceRowTop -= offset;
6516  sourceRowBottom -= offset;
6517  targetRow -= offset;
6518 
6519  // the for loop will stop after this iteration
6520  ocean_assert(!(n + 16u < elements));
6521  }
6522 
6523  // loading the next four 32 bit values from the top and bottom row
6524  const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6525  const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6526  const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6527  const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6528 
6529  const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6530  const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6531  const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6532  const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6533 
6534  // interpolatedRow_32x4 = top_32x4 * factorsTop + bottom_32x4 * factorsBottom
6535  float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6536  float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6537  float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6538  float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6539 
6540  interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6541  interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6542  interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6543  interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6544 
6545  // writing back the four interpolated 32 bit results
6546  vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6547  vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6548  vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6549  vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6550 
6551  sourceRowTop += 16;
6552  sourceRowBottom += 16;
6553  targetRow += 16;
6554  }
6555 }
6556 
6557 template <>
6558 inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(const float* extendedSourceRow, float* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
6559 {
6560  ocean_assert(extendedSourceRow != nullptr);
6561  ocean_assert(targetRow != nullptr);
6562  ocean_assert(targetWidth >= 8u);
6563  ocean_assert(interpolationLocations != nullptr);
6564  ocean_assert(interpolationFactorsRight != nullptr);
6565 
6566  ocean_assert(channels == 1u);
6567 
6568  // [1.0f, 1.0f, 1.0f, 1.0f]
6569  const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6570 
6571  for (unsigned int x = 0; x < targetWidth; x += 8u)
6572  {
6573  if (x + 8u > targetWidth)
6574  {
6575  // the last iteration will not fit into the output frame,
6576  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6577 
6578  ocean_assert(x >= 8u && targetWidth > 8u);
6579  const unsigned int newX = targetWidth - 8u;
6580 
6581  ocean_assert(x > newX);
6582  const unsigned int offset = x - newX;
6583 
6584  targetRow -= offset;
6585  interpolationLocations -= offset;
6586  interpolationFactorsRight -= offset;
6587 
6588  x = newX;
6589 
6590  // the for loop will stop after this iteration
6591  ocean_assert(!(x + 8u < targetWidth));
6592  }
6593 
6594  // we load the left and the right pixels (for four resulting target pixels)
6595 
6596  const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6597  const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6598  const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6599 
6600  const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6601  const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6602  const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6603 
6604  const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6605  const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6606  const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6607 
6608  const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6609  const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6610  const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6611 
6612  const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6613  const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6614  const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6615 
6616  const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6617  const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6618  const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6619 
6620  const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6621  const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6622 
6623  const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6624  const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6625 
6626  const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6627  const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6628 
6629  const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6630  const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6631 
6632  const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6633  const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6634 
6635  vst1q_f32(targetRow + 0, result_0123_f_32x4);
6636  vst1q_f32(targetRow + 4, result_4567_f_32x4);
6637 
6638  targetRow += 8;
6639  interpolationLocations += 8;
6640  interpolationFactorsRight += 8;
6641  }
6642 }
6643 
6644 template <>
6645 inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(const float* source, float* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6646 {
6647  ocean_assert(source != nullptr && target != nullptr);
6648  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6649  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6650  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6651 
6652  ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6653 
6654  const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6655  const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6656 
6657  typedef void (*InterpolateRowVerticalFunction)(const float*, const float*, float*, const unsigned int, const float);
6658  typedef void (*InterpolateRowHorizontalFunction)(const float*, float*, const unsigned int, const unsigned int, const unsigned int*, const float*);
6659 
6660  InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6661  InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6662 
6663  if (sourceWidth * 1u >= 16u)
6664  {
6665  interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6666  }
6667 
6668  if (targetWidth >= 8u)
6669  {
6670  interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6671  }
6672 
6673  target += targetStrideElements * firstTargetRow;
6674 
6675  const float sourceX_T_targetX = float(sourceX_s_targetX);
6676  const float sourceY_T_targetY = float(sourceY_s_targetY);
6677 
6678  // See the generic template function for a detailed documentation regarding interpolation factors.
6679 
6680  Memory memoryIntermediateExtendedRow;
6681  Memory memoryHorizontalInterpolationLocations;
6682  Memory memoryHorizontalInterpolationFactorsRight;
6683 
6684  if (sourceWidth != targetWidth)
6685  {
6686  // in case we are scaling the width of the frame, we use an intermediate buffer and pre-calculated interpolation locations and factors
6687 
6688  memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u); // one additional pixel
6689 
6690  memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth); // one offset for each target pixel
6691 
6692  memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth); // one factors (right) for each target pixel
6693  }
6694 
6695  if (memoryHorizontalInterpolationLocations)
6696  {
6697  ocean_assert(memoryHorizontalInterpolationFactorsRight);
6698 
6699  if (targetWidth >= 4u)
6700  {
6701  const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6702  const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6703 
6704  // [0.0f, 0.0f, 0.0f, 0.0f]
6705  const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6706 
6707  // [4.0f, 4.0f, 4.0f, 4.0f]
6708  const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6709 
6710  // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1]
6711  const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6712 
6713  // [0.0f, 1.0f, 2.0f, 3.0f]
6714  const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6715  float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6716 
6717  // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6718 
6719  for (unsigned int x = 0u; x < targetWidth; x += 4u)
6720  {
6721  if (x + 4u > targetWidth)
6722  {
6723  // the last iteration will not fit into the output frame,
6724  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
6725 
6726  ocean_assert(x >= 4u && targetWidth > 4u);
6727  const unsigned int newX = targetWidth - 4u;
6728 
6729  ocean_assert(x > newX);
6730  const unsigned int offset = x - newX;
6731 
6732  x = newX;
6733 
6734  x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(float(offset)));
6735 
6736  // the for loop will stop after this iteration
6737  ocean_assert(!(x + 4u < targetWidth));
6738  }
6739 
6740  // we calculate the four source locations for our four target locations
6741  const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6742 
6743  // now we determine the pixel/integer accurate source locations
6744  // left = min(floor(sourceX), sourceWidth - 1)
6745  uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4); // no rounding here
6746 
6747  // we store the offsets we have calculated
6748  vst1q_u32(memoryHorizontalInterpolationLocations.data<unsigned int>() + x, left_0123_u_32x4);
6749 
6750  // factorRight = sourcceX - float(left)
6751  const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6752 
6753  vst1q_f32(memoryHorizontalInterpolationFactorsRight.data<float>() + x, factorsRight_f_32x4);
6754 
6755  // [x + 0, x + 1, x + 2, x + 3] + [4, 4, 4, 4]
6756  x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6757  }
6758  }
6759  else
6760  {
6761  const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6762 
6763  // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6764 
6765  for (unsigned int x = 0u; x < targetWidth; ++x)
6766  {
6767  const float sourceX = max(0.0f, targetOffsetX + float(x) * sourceX_T_targetX);
6768 
6769  const unsigned int left = min((unsigned int)sourceX, sourceWidth - 1u); // no rounding here
6770 
6771  memoryHorizontalInterpolationLocations.data<unsigned int>()[x] = left;
6772 
6773  const float factorRight = sourceX - float(left);
6774  ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6775 
6776  memoryHorizontalInterpolationFactorsRight.data<float>()[x] = factorRight;
6777  }
6778  }
6779  }
6780 
6781  const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
6782 
6783  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6784  {
6785  const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY * float(y), float(sourceHeight) - 1.0f);
6786 
6787  const unsigned int sourceRowTop = (unsigned int)sourceY; // we must not round here
6788  const float factorBottom = sourceY - float(sourceRowTop);
6789  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6790 
6791  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6792 
6793  const float* const sourceTopRow = source + sourceStrideElements * sourceRowTop;
6794  const float* const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
6795 
6796  float* targetRow = nullptr;
6797 
6798  if (sourceHeight == targetHeight)
6799  {
6800  ocean_assert(sourceWidth != targetWidth);
6801  ocean_assert(memoryIntermediateExtendedRow);
6802 
6803  // we do not need to interpolate two lines, thus we simply need to copy the row (as we need an additional pixel at the end)
6804  memcpy(memoryIntermediateExtendedRow.data<float>(), sourceTopRow, sourceWidth * sizeof(float));
6805  }
6806  else
6807  {
6808  // in case we do not scale the width of the frame, we can write the result to the target frame directly
6809  targetRow = memoryIntermediateExtendedRow.isNull() ? target : memoryIntermediateExtendedRow.data<float>();
6810 
6811  ocean_assert(targetRow != nullptr);
6812  ocean_assert(interpolateRowVerticalFunction != nullptr);
6813  interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
6814  }
6815 
6816  if (memoryIntermediateExtendedRow) // sourceWidth != targetWidth
6817  {
6818  // we use an extended row (with one additional pixel at the end - equal to the last pixel)
6819  // so we have to copy the last pixel
6820  memoryIntermediateExtendedRow.data<float>()[sourceWidth] = memoryIntermediateExtendedRow.data<float>()[sourceWidth - 1u];
6821 
6822  interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.data<float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.data<unsigned int>(), memoryHorizontalInterpolationFactorsRight.data<float>());
6823  }
6824 
6825  target += targetStrideElements;
6826  }
6827 }
6828 
6829 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
6830 
6831 template <typename T, typename TScale, unsigned int tChannels>
6832 void FrameInterpolatorBilinear::scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6833 {
6834  static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value), "Invalid TScale type");
6835 
6836  ocean_assert(source != nullptr && target != nullptr);
6837  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
6838  ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
6839  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6840 
6841  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
6842  const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
6843 
6844  const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
6845  const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
6846 
6847  /*
6848  * We determine the sub-pixel accurate source location for each target pixel as follows:
6849  *
6850  * Example with a downsampling by factor 4:
6851  * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
6852  * targetRow with 3 pixels: | 0 1 2 |
6853  *
6854  * Thus, the source row can be separated into three blocks;
6855  * and we want to extract the color information from the center of the blocks:
6856  * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
6857  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 4)
6858  *
6859  * Thus, we add 0.5 to each target coordinate before converting it to a source location;
6860  * and subtract 0.5 again afterwards:
6861  * sourceX = (targetX + 0.5) * targetTSourceX - 0.5
6862  *
6863  * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
6864  * (1 + 0.5) * 4 - 0.5 = 5.5
6865  *
6866  *
6867  * Example with a downsampling by factor 3:
6868  * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
6869  * targetRow with 3 pixels: | 0 1 2 |
6870  *
6871  * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
6872  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 3)
6873  *
6874  * e.g., (0 + 0.5) * 3 - 0.5 = 1
6875  * (1 + 0.5) * 3 - 0.5 = 4
6876  *
6877  *
6878  * Example with a downsampling by factor 2:
6879  * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
6880  * targetRow with 3 pixels: | 0 1 2 |
6881  *
6882  * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
6883  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 2)
6884  *
6885  * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
6886  * (1 + 0.5) * 2 - 0.5 = 2.5
6887  *
6888  *
6889  * we can simplify the calculation (as we have a constant term):
6890  * sourceX = (targetX * targetTSourceX) + (0.5 * targetTSourceX - 0.5)
6891  */
6892 
6893  const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
6894  const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
6895 
6896  const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
6897  const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
6898 
6899  target += targetStrideElements * firstTargetRow;
6900 
6901  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6902  {
6903  const TScale sy = minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
6904  ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
6905 
6906  const unsigned int sTop = (unsigned int)sy;
6907  ocean_assert(sy >= TScale(sTop));
6908 
6909  const TScale factorBottom = sy - TScale(sTop);
6910  ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
6911 
6912  const TScale factorTop = TScale(1) - factorBottom;
6913  ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
6914 
6915  const T* const sourceTop = source + sTop * sourceStrideElements;
6916  const T* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
6917 
6918  for (unsigned int x = 0; x < targetWidth; ++x)
6919  {
6920  const TScale sx = minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
6921  ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
6922 
6923  const unsigned int sLeft = (unsigned int)sx;
6924  ocean_assert(sx >= TScale(sLeft));
6925 
6926  const TScale factorRight = sx - TScale(sLeft);
6927  ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
6928 
6929  const TScale factorLeft = TScale(1) - factorRight;
6930  ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
6931 
6932  const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
6933 
6934  const T* const sourceTopLeft = sourceTop + sLeft * tChannels;
6935  const T* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
6936 
6937  const TScale factorTopLeft = factorTop * factorLeft;
6938  const TScale factorTopRight = factorTop * factorRight;
6939  const TScale factorBottomLeft = factorBottom * factorLeft;
6940  const TScale factorBottomRight = factorBottom * factorRight;
6941 
6942  for (unsigned int n = 0u; n < tChannels; ++n)
6943  {
6944  target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
6945  + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
6946  }
6947 
6948  target += tChannels;
6949  }
6950 
6951  target += targetPaddingElements;
6952  }
6953 }
6954 
6955 template <unsigned int tChannels>
6956 void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6957 {
6958  static_assert(tChannels != 0u, "Invalid channel number!");
6959 
6960  ocean_assert(firstTargetRow + numberTargetRows <= height);
6961 
6962  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
6963 
6964  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
6965 
6966  uint8_t zeroColor[tChannels] = {uint8_t(0)};
6967  const PixelType bColor = borderColor ? *(const PixelType*)borderColor : *(const PixelType*)zeroColor;
6968 
6969  const SquareMatrix3 rotationMatrix3(Rotation(0, 0, 1, angle));
6970  const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
6971 
6972  const Scalar width_1 = Scalar(width - 1u);
6973  const Scalar height_1 = Scalar(height - 1u);
6974  const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
6975 
6976  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6977  {
6978  PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
6979 
6980  const Scalar floatY = Scalar(y);
6981 
6982  for (unsigned int x = 0; x < width; ++x)
6983  {
6984  const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (Vector2(Scalar(x), floatY) - anchorPosition));
6985 
6986  if (sourceLocation.x() >= 0 && sourceLocation.y() >= 0 && sourceLocation.x() <= width_1 && sourceLocation.y() <= height_1)
6987  {
6988  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
6989  }
6990  else
6991  {
6992  *targetPixel = bColor;
6993  }
6994 
6995  ++targetPixel;
6996  }
6997  }
6998 }
6999 
7000 } // namespace CV
7001 
7002 } // namespace Ocean
7003 
7004 #endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
This class implements the abstract base class for all AnyCamera objects.
Definition: AnyCamera.h:130
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual bool isValid() const =0
Returns whether this camera is valid.
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition: FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition: FrameBlender.h:1160
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameInterpolatorBilinear.h:60
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition: FrameInterpolatorBilinear.h:1521
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition: FrameInterpolatorBilinear.h:1434
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition: FrameInterpolatorBilinear.h:341
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition: FrameInterpolatorBilinear.h:44
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition: FrameInterpolatorBilinear.h:4285
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition: FrameInterpolatorBilinear.h:1733
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition: FrameInterpolatorBilinear.h:1893
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1799
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:1833
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition: FrameInterpolatorBilinear.h:1960
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition: FrameInterpolatorBilinear.h:2464
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 &center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1816
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4507
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition: FrameInterpolatorBilinear.h:1786
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:3577
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition: FrameInterpolatorBilinear.h:2380
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1770
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition: FrameInterpolatorBilinear.h:3957
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition: FrameInterpolatorBilinear.h:4351
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4587
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition: FrameInterpolatorBilinear.h:1657
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4633
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition: FrameInterpolatorBilinear.h:3327
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition: FrameInterpolatorBilinear.h:4992
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:5407
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition: FrameInterpolatorBilinear.h:50
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition: FrameInterpolatorBilinear.h:5372
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:6956
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:1880
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition: FrameInterpolatorBilinear.h:3259
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition: FrameInterpolatorBilinear.h:1757
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition: FrameInterpolatorBilinear.h:4681
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition: FrameInterpolatorBilinear.h:1608
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition: FrameInterpolatorBilinear.h:1621
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition: FrameInterpolatorBilinear.h:4735
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:5231
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:1942
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition: FrameInterpolatorBilinear.h:5352
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1695
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition: FrameInterpolatorBilinear.h:2139
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:5134
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition: FrameInterpolatorBilinear.h:2228
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:2649
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition: FrameInterpolatorBilinear.h:5187
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4432
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:6832
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:2303
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition: FrameInterpolatorBilinear.h:2053
This class implements a 2D pixel position with pixel precision.
Definition: PixelPosition.h:65
T y() const
Returns the vertical coordinate position of this object.
Definition: PixelPosition.h:470
T x() const
Returns the horizontal coordinate position of this object.
Definition: PixelPosition.h:458
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition: SSE.h:3770
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
Template class allowing to define an array of data types.
Definition: DataType.h:27
This class implements Ocean's image class.
Definition: Frame.h:1760
bool isValid() const
Returns whether this frame is valid.
Definition: Frame.h:4416
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition: Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition: Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition: Lookup2.h:941
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition: Lookup2.h:959
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition: Lookup2.h:953
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition: Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition: Lookup2.h:1786
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition: Lookup2.h:2128
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition: Lookup2.h:1864
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition: base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition: base/Memory.h:303
This class provides basic numeric functionalities.
Definition: Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition: Numeric.h:2026
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition: Numeric.h:2087
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition: Numeric.h:2237
unsigned int width() const
Returns the width of the camera image.
Definition: PinholeCamera.h:1300
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition: PinholeCamera.h:1263
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition: PinholeCamera.h:1257
unsigned int height() const
Returns the height of the camera image.
Definition: PinholeCamera.h:1306
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition: PinholeCamera.h:1602
This class implements a 2x2 square matrix.
Definition: SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition: SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition: SquareMatrix3.h:1046
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition: SquareMatrix3.h:1365
const T & x() const noexcept
Returns the x value.
Definition: Vector2.h:698
const T & y() const noexcept
Returns the y value.
Definition: Vector2.h:710
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition: Vector2.h:746
const T & y() const noexcept
Returns the y value.
Definition: Vector3.h:812
const T & x() const noexcept
Returns the x value.
Definition: Vector3.h:800
const T & z() const noexcept
Returns the z value.
Definition: Vector3.h:824
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition: base/Utilities.h:903
PixelCenter
Definition of individual centers of pixels.
Definition: CV.h:117
PixelPositionT< int > PixelPositionI
Definition of a PixelPosition object with a data type allowing positive and negative coordinate value...
Definition: PixelPosition.h:41
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition: CV.h:133
@ PC_CENTER
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition: CV.h:150
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition: SquareMatrix3.h:35
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition: Rotation.h:31
float Scalar
Definition of a scalar type.
Definition: Math.h:128
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition: Vector3.h:22
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition: Vector2.h:21
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition: DataType.h:373