/*
Copyright (c) Meta Platforms, Inc. and affiliates.

This source code is licensed under the MIT license found in the
LICENSE file in the root directory of this source tree.
*/
11 #include "ocean/cv/CV.h"
12 #include "ocean/cv/FrameBlender.h"
13 #include "ocean/cv/PixelPosition.h"
14 #include "ocean/cv/SSE.h"
16 #include "ocean/base/DataType.h"
17 #include "ocean/base/Frame.h"
18 #include "ocean/base/Memory.h"
19 #include "ocean/base/Worker.h"
23 #include "ocean/math/AnyCamera.h"
26 #include "ocean/math/Lookup2.h"
28 #include "ocean/math/Quaternion.h"
31 #include "ocean/math/Vector2.h"
33 namespace Ocean
34 {
36 namespace CV
37 {
39 /**
40  * This class implements bilinear frame interpolator functions.
41  * @ingroup cv
42  */
43 class OCEAN_CV_EXPORT FrameInterpolatorBilinear
44 {
45  public:
47  /**
48  * Definition of a lookup table for 2D vectors.
49  */
52  public:
54  /**
55  * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
56  * Best practice is to avoid using these functions if binary size matters,<br>
57  * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
58  */
59  class OCEAN_CV_EXPORT Comfort
60  {
61  public:
63  /**
64  * Resizes/rescales a given frame by application of a bilinear interpolation.
65  * @param source The source frame to resize, must be valid
66  * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
67  * @param worker Optional worker object used for load distribution
68  * @return True, if the frame could be resized
69  */
70  static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
72  /**
73  * Resizes/rescales a given frame by application of a bilinear interpolation.
74  * @param frame The frame to resize, must be valid
75  * @param width The width of the resized frame in pixel, with range [1, infinity)
76  * @param height The height of the resized frame in pixel, with range [1, infinity)
77  * @param worker Optional worker object used for load distribution
78  * @return True, if the frame could be resized
79  */
80  static inline bool resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker = nullptr);
82  /**
83  * Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
84  * The resulting zoomed image will have the same frame type (frame resolution, pixel format, pixel origin) as the input image.<br>
85  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
86  * @param source The source frame for which the zoomed image content will be created, must be valid
87  * @param target The resulting target frame which will receive the zoomed image, will be set to the same frame type as the source frame, can be invalid
88  * @param zoomFactor The zoom factor to be applied, a factor < 1 will zoom out, a factor > 1 will zoom in, with range (0, infinity)
89  * @param worker Optional worker object to distribute the computation to several CPU cores
90  * @return True, if succeeded
91  */
92  static bool zoom(const Frame& source, Frame& target, const Scalar zoomFactor, Worker* worker = nullptr);
94  /**
95  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
96  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
97  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
98  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
99  * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
100  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
101  * @param input The input frame that will be transformed, must be valid
102  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
103  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
104  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels and the data type of the pixel elements, nullptr to assign 0 to each channel
105  * @param worker Optional worker object to distribute the computational load
106  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
107  * @return True, if succeeded
108  */
109  static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
111  /**
112  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
113  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
114  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
115  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
116  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
117  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
118  * @param input The input frame that will be transformed
119  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
120  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
121  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
122  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
123  * @param worker Optional worker object to distribute the computational load
124  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
125  * @return True, if succeeded
126  */
127  static bool homographies(const Frame& input, Frame& output, const SquareMatrix3 homographies[4], const Vector2& outputQuadrantCenter, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
129  /**
130  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
131  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
132  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
133  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
134  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
135  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
136  * @param input The input frame that will be transformed, must be valid
137  * @param output The Output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
138  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid and must have the same frame dimension as the output frame
139  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
140  * @param worker Optional worker object to distribute the computational load
141  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
142  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
143  * @return True, if succeeded
144  * @see coversHomographyInputFrame().
145  */
146  static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
148  /**
149  * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
150  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
151  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
152  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
153  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
154  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
155  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
156  * @param input The input frame that will be transformed, must be valid
157  * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
158  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
159  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
160  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
161  * @param worker Optional worker object to distribute the computational load
162  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
163  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
164  * @return True, if succeeded
165  * @see coversHomographyInputFrame().
166  */
167  static bool homographiesMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3* homographies, const Vector2& outputQuadrantCenter, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
169  /**
170  * Transforms a given input frame into an output frame by application of a homography.
171  * This function also uses a camera profile to improve the interpolation accuracy.<br>
172  * The given homography is transformed into a homography for normalized image coordinates.<br>
173  * Thus, also distortion parameters of the camera profile can be applied.<br>
174  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
175  * @param inputCamera The pinhole camera profile to be applied for the input frame
176  * @param outputCamera The pinhole camera profile to be applied for the output frame
177  * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
178  * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
179  * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
180  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
181  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
182  * @param worker Optional worker object to distribute the computational load
183  * @return True, if succeeded
184  * @see homographyWithCameraMask(), homography().
185  */
186  static bool homographyWithCamera(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const Frame& input, Frame& output, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor = nullptr, Worker* worker = nullptr);
188  /**
189  * Transforms a given input frame into an output frame by application of a homography.
190  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
191  * This function also uses a camera profile to improve the interpolation accuracy.<br>
192  * The given homography is transformed into a homography for normalized image coordinates.<br>
193  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
194  * Thus, also distortion parameters of the camera profile can be applied.<br>
195  * @param inputCamera The pinhole camera profile to be applied for the input frame
196  * @param outputCamera The pinhole camera profile to be applied for the output frame
197  * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
198  * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
199  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
200  * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
201  * @param worker Optional worker object to distribute the computational load
202  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
203  * @return True, if succeeded
204  * @see homographyWithCamera(), homography().
205  */
206  static bool homographyWithCameraMask(const AnyCamera& inputCamera, const AnyCamera& outputCamera, const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& homography, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
208  /**
209  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
210  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
211  * Information: This function is the equivalent to OpenCV's cv::remap().
212  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
213  * @param input The input frame that will be transformed
214  * @param output Resulting output frame, the dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
215  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
216  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
217  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
218  * @param worker Optional worker object to distribute the computation
219  * @return True, if succeeded
220  */
221  static bool lookup(const Frame& input, Frame& output, const LookupTable& input_LT_output, const bool offset, const void* borderColor, Worker* worker = nullptr);
223  /**
224  * Transforms a given input frame into an output frame by application of an interpolation lookup table and creates and additional mask as output.
225  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
226  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
227  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
228  * @param input The input frame which will be transformed
229  * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
230  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
231  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
232  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
233  * @param worker Optional worker object to distribute the computation
234  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
235  * @return True, if succeeded
236  */
237  static bool lookupMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& input_LT_output, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
239  /**
240  * Applies an affine transformation to an image.
241  * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.
242  * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation.
243  * The multiplication of the affine transformation with pixel location in the target image yield their location in the source image, i.e., sourcePoint = source_A_target * targetPoint.
244  * The parameter 'targetOrigin' applies an additional translation to the provided affine transformation i.e., source_A_target * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
245  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
246  * <pre>
247  * a c e
248  * b d f
249  * 0 0 1
250  * </pre>
251  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
252  * Information: This function is the equivalent to OpenCV's cv::warpAffine().
253  * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
254  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
255  * @param source The source frame that will be transformed, must be valid
256  * @param target The resulting frame after applying the affine transformation to the source frame; pixel format and pixel origin must be identical to source frame; memory of target frame must be allocated by the caller
257  * @param source_A_target Affine transform used to transform the given source frame, transforming points defined in the target frame into points defined in the source frame
258  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
259  * @param worker Optional worker object to distribute the computational load
260  * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
261  * @return True, if succeeded
262  */
263  static bool affine(const Frame& source, Frame& target, const SquareMatrix3& source_A_target, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& targetOrigin = PixelPositionI(0, 0));
265  /**
266  * Rotates a given frame by a bilinear interpolation.
267  * The frame will be rotated around a specified anchor position (inside or outside the frame).<br>
268  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
269  * @param source The source frame to be rotated, must be valid
270  * @param target The target frame which will receive the rotated image, will be set to the same frame type as the source frame, can be invalid
271  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
272  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
273  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
274  * @param worker Optional worker object to distribute the computation to several CPU cores
275  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
276  * @return True, if succeeded
277  */
278  static bool rotate(const Frame& source, Frame& target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
280  /**
281  * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
282  * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
283  * @param sourceFrame The source image captured with the source camera profile, must be valid
284  * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
285  * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
286  * @param targetCamera The camera profile of the target frame, must be valid
287  * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
288  * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
289  * @param worker Optional worker object to distribute the computational load
290  * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
291  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use ElementType(0) for each channel
292  * @return True, if succeeded
293  * @see resampleCameraImageImage8BitPerChannel().
294  */
295  static bool resampleCameraImage(const Frame& sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, Frame& targetFrame, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const void* borderColor = nullptr);
297  /**
298  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
299  * This function uses an integer interpolation with a precision of 1/128.
300  * @param frame The frame to determine the pixel values from, must be valid
301  * @param channels Number of channels of the given frame, with range [1, 8]
302  * @param width The width of the frame in pixel, with range [1, infinity)
303  * @param height The height of the frame in pixel, with range [1, infinity)
304  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
305  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
306  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
307  * @param result Resulting pixel values, must be valid, must be valid
308  * @return True, if succeeded
309  * @tparam TScalar The scalar data type of the sub-pixel position
310  */
311  template <typename TScalar = Scalar>
312  static bool interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result);
314  /**
315  * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
316  * This function uses floating point precision during interpolation.
317  * @param frame The frame to determine the pixel values from, must be valid
318  * @param channels Number of channels of the given frame, with range [1, 8]
319  * @param width The width of the frame in pixel, with range [1, infinity)
320  * @param height The height of the frame in pixel, with range [1, infinity)
321  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
322  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
323  * @param position The position to determine the interpolated pixel values for, with range [0, width)x[0, height)
324  * @param result Resulting interpolated pixel value(s), must be valid
325  * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
326  * @return True, if succeeded
327  * @tparam TSource The data type of the provided pixel values in the (source) frame
328  * @tparam TTarget The data type of the resulting interpolated value(s)
329  * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
330  * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
331  */
332  template <typename TSource, typename TTarget, typename TScalar = Scalar, typename TIntermediate = TScalar>
333  static bool interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
334  };
336  /**
337  * This class implements highly optimized interpolation functions with fixed properties.
338  * The functions can be significantly faster as these functions are tailored to the specific properties.
339  */
340  class OCEAN_CV_EXPORT SpecialCases
341  {
342  public:
344  /**
345  * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
346  * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
347  * @param source The source frame buffer with resolution 400x400, must be valid
348  * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
349  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
350  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
351  * @see FrameInterpolatorBilinear::resize<T, tChannels>().
352  */
353  static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
355  /**
356  * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 by using a bilinear interpolation.
357  * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
358  * @param source The source frame buffer with resolution 400x400, must be valid
359  * @param target The target frame buffer receiving the resized image information, with resolution 256x256, must be valid
360  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
361  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
362  * @see FrameInterpolatorBilinear::resize<T, tChannels>().
363  */
364  static void resize400x400To256x256_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
365  };
367  /**
368  * Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinear interpolation.
369  * This function is actually a wrapper for scale().
370  * @param source The source frame buffer providing the image information to be resized, must be valid
371  * @param target The target frame buffer receiving the resized image information, must be valid
372  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
373  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
374  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
375  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
376  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
377  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
378  * @param worker Optional worker object to distribute the computation to several CPU cores
379  * @tparam T Data type of each pixel channel, e.g., float, double, int
380  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
381  * @see scale<T, tChannels>().
382  */
383  template <typename T, unsigned int tChannels>
384  static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
386  /**
387  * Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interpolation with user-defined scaling factors.
388  * Beware: This function is not optimized for performance but supports arbitrary data types.<br>
389  * Try to use scale8BitPerChannel() if possible.
390  * @param source The source frame buffer providing the image information to be resized, must be valid
391  * @param target The target frame buffer receiving the rescaled image information, must be valid
392  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
393  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
394  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
395  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
396  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
397  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
398  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
399  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
400  * @param worker Optional worker object to distribute the computation to several CPU cores
401  * @tparam T Data type of each pixel channel, e.g., float, double, int
402  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
403  * @see resize<T, tChannels>().
404  */
405  template <typename T, unsigned int tChannels>
406  static inline void scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
408  /**
409  * Rotates a given frame by a bilinear interpolation.
410  * The frame will be rotated around a specified anchor position (inside or outside the frame).
411  * @param source The source frame to be rotated, must be valid
412  * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
413  * @param width The width of the source and target frame in pixel, with range [1, infinity)
414  * @param height The height of the source and target frame in pixel, with range [1, infinity)
415  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
416  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
417  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
418  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
419  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
420  * @param worker Optional worker object to distribute the computation to several CPU cores
421  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
422  * @tparam tChannels The number of channels both frames have, with range [1, infinity)
423  */
424  template <unsigned int tChannels>
425  static inline void rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
427  /**
428  * Apply an affine transforms to a N-channel, 8-bit frame
429  * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.<br>
430  * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation).<br>
431  * The 'targetOrigin' parameter simply applies an additional translation onto the provided affine transformation i.e., affine * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
432  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
433  * <pre>
434  * a c e
435  * b d f
436  * 0 0 1
437  * </pre>
438  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
439  * @param source Input frame that will be transformed, must be valid
440  * @param sourceWidth Width of both images in pixel, with range [1, infinity)
441  * @param sourceHeight Height of both images pixel, with range [1, infinity)
442  * @param source_A_target Affine transformation, such that: sourcePoint = source_A_target * targetPoint
443  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
444  * @param target The target frame using the given affine transform, must be valid
445  * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
446  * @param targetWidth The width of the target image in pixel, with range [1, infinity)
447  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
448  * @param sourcePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
449  * @param targetPaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
450  * @param worker Optional worker object to distribute the computational load
451  * @tparam tChannels Number of channels of the frame
452  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
453  */
454  template <unsigned int tChannels>
455  static inline void affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
457  /**
458  * Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of a homography.
459  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
460  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
461  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
462  * @param input The input frame that will be transformed, must be valid
463  * @param inputWidth Width of both images in pixel, with range [1, infinity)
464  * @param inputHeight Height of both images pixel, with range [1, infinity)
465  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
466  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
467  * @param output The output frame using the given homography, must be valid
468  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
469  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
470  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
471  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
472  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
473  * @param worker Optional worker object to distribute the computational load
474  * @tparam T Data type of each pixel channel, e.g., float, double, int
475  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
476  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
477  */
478  template <typename T, unsigned int tChannels>
479  static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
481  /**
482  * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
483  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
484  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
485  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
486  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
487  * @param input The input frame that will be transformed
488  * @param inputWidth Width of both images in pixel, with range [1, infinity)
489  * @param inputHeight Height of both images pixel, with range [1, infinity)
490  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
491  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
492  * @param output The output frame using the given homography
493  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
494  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
495  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
496  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
497  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
498  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
499  * @param worker Optional worker object to distribute the computational load
500  * @tparam tChannels Number of channels of the frame
501  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
502  */
503  template <unsigned int tChannels>
504  static inline void homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
506  /**
507  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
508  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
509  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
510  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
511  * @param input The input frame that will be transformed, must be valid
512  * @param inputWidth Width of both images in pixel, with range [1, infinity)
513  * @param inputHeight Height of both images pixel, with range [1, infinity)
514  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
515  * @param output The output frame using the given homography, must be valid
516  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid
517  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
518  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
519  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
520  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
521  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
522  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
523  * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
524  * @param worker Optional worker object to distribute the computational load
525  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
526  * @see homography(), homographyWithCamera8BitPerChannel().
527  */
528  template <unsigned int tChannels>
529  static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue /* = 0xFF*/, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr);
531  /**
532  * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
533  * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
534  * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
535  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
536  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
537  * @param input The input frame that will be transformed
538  * @param inputWidth Width of both images in pixel, with range [1, infinity)
539  * @param inputHeight Height of both images pixel, with range [1, infinity)
540  * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
541  * @param output The output frame using the given homography
542  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
543  * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
544  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
545  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
546  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
547  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
548  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
549  * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
550  * @param worker Optional worker object to distribute the computational load
551  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
552  * @tparam tChannels Number of channels of the frame
553  * @see homography(), homographyWithCamera8BitPerChannel().
554  */
555  template <unsigned int tChannels>
556  static inline void homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
558  /**
559  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
560  * This function also uses a camera profile to improve the interpolation accuracy.<br>
561  * The given homography is transformed into a homography for normalized image coordinates.<br>
562  * Thus, also distortion parameters of the camera profile can be applied.<br>
563  * @param inputCamera The pinhole camera profile to be applied for the input frame
564  * @param outputCamera The pinhole camera profile to be applied for the output frame
565  * @param input The input frame that will be transformed
566  * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
567  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
568  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
569  * @param output The output frame using the given homography
570  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
571  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
572  * @param worker Optional worker object to distribute the computational load
573  * @tparam tChannels Number of channels of the frame
574  * @see homography().
575  */
576  template <unsigned int tChannels>
577  static inline void homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
579  /**
580  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
581  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame.<br>
582  * This function also uses a camera profile to improve the interpolation accuracy.<br>
583  * The given homography is transformed into a homography for normalized image coordinates.<br>
584  * Thus, also distortion parameters of the camera profile can be applied.
585  * @param inputCamera The pinhole camera profile to be applied for the input frame, must be valid
586  * @param outputCamera The pinhole camera profile to be applied for the output frame, must be valid
587  * @param input The input frame that will be transformed, must be valid
588  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
589  * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
590  * @param output The output frame using the given homography
591  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
592  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
593  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
594  * @param worker Optional worker object to distribute the computational load
595  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
596  * @tparam tChannels Number of channels of the frame
597  */
598  template <unsigned int tChannels>
599  static inline void homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
601  /**
602  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
603  * The frame must have a 1-plane pixel format.<br>
604  * The output frame must have the same pixel format and pixel origin as the input frame.
605  * @param input The input frame which will be transformed, must be valid
606  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
607  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
608  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
609  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
610  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
611  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
612  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
613  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
614  * @param worker Optional worker object to distribute the computation
615  * @tparam T Data type of each pixel channel, e.g., float, double, int
616  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
617  */
618  template <typename T, unsigned int tChannels>
619  static inline void lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
621  /**
622  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
623  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
624  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
625  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
626  * @param input The input frame which will be transformed
627  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
628  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
629  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
630  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
631  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
632  * @param outputMask Resulting mask frame with 8 bits per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
633  * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
634  * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
635  * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
636  * @param worker Optional worker object to distribute the computation
637  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
638  * @tparam tChannels Number of channels of the frame
639  */
640  template <unsigned int tChannels>
641  static inline void lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
643  /**
644  * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
645  * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
646  * @param sourceFrame The source image captured with the source camera profile, must be valid
647  * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
648  * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
649  * @param targetCamera The camera profile of the target frame, must be valid
650  * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
651  * @param sourceFramePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
652  * @param targetFramePaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
653  * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
654  * @param worker Optional worker object to distribute the computational load
655  * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
656  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use T(0) for each channel
657  * @tparam T Data type of each pixel channel, e.g., uint8_t, int16_t, float, double
658  * @tparam tChannels The number of frame channels, with range [1, infinity)
659  * @see Comfort::resampleCameraImage().
660  */
661  template <typename T, unsigned int tChannels>
662  static void resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const T* borderColor = nullptr);
664  /**
665  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
666  * This function uses an integer interpolation with a precision of 1/128.
667  * @param frame The frame to determine the pixel values from, must be valid
668  * @param width The width of the frame in pixel, with range [1, infinity)
669  * @param height The height of the frame in pixel, with range [1, infinity)
670  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
671  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
672  * @param result Resulting pixel values, must be valid, must be valid
673  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
674  * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
675  * @tparam TScalar The scalar data type of the sub-pixel position
676  * @see interpolatePixel().
677  */
678  template <unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
679  static inline void interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result);
681  /**
682  * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
683  * This function uses floating point precision during interpolation.
684  * @param frame The frame to determine the pixel values from, must be valid
685  * @param width The width of the frame in pixel, with range [1, infinity)
686  * @param height The height of the frame in pixel, with range [1, infinity)
687  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
688  * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
689  * @param result Resulting interpolated pixel value(s), must be valid
690  * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
691  * @tparam TSource The data type of the provided pixel values in the (source) frame
692  * @tparam TTarget The data type of the resulting interpolated value(s)
693  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
694  * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
695  * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
696  * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
697  * @see interpolatePixel8BitPerChannel().
698  */
699  template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar, typename TIntermediate = TScalar>
700  static inline void interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
702  /**
703  * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame with alpha channel.
704  * The center of each pixel is located with an offset of (0.5 x 0.5) in relation to the real pixel position.<br>
705  * The given frame is virtually extended by a fully transparent border so that this functions supports arbitrary interpolation positions.<br>
706  * If the given position lies inside the frame area of (-0.5, -0.5) -> (width + 0.5, height + 0.5) the resulting interpolation result will contain color information of the frame, otherwise a fully transparent interpolation result is provided.<br>
707  * @param frame The frame to determine the pixel values from, must be valid
708  * @param width The width of the frame in pixel, with range [1, infinity)
709  * @param height The height of the frame in pixel, with range [1, infinity)
710  * @param position The position to determine the interpolated pixel values for, with range (-infinity, infinity)x(-infinity, infinity)
711  * @param result Resulting pixel values, must be valid
712  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
713  * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
714  * @tparam tAlphaAtFront True, if the alpha channel is in the front of the data channels
715  * @tparam tTransparentIs0xFF True, if 0xFF is interpreted as fully transparent
716  */
717  template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
718  static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements);
720  /**
721  * Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as lined integral frame.
722  * @param linedIntegralFrame The lined integral image created from the actual gray-scale image for which the patch intensity sum will be determined, must be valid
723  * @param frameWidth Width of the original frame in pixel (not the width of the lined-integral frame), with range [1, infinity)
724  * @param frameHeight Height of the original frame in pixel (not the height of the lined-integral frame), with range [1, infinity)
725  * @param lineIntegralFramePaddingElements The number of padding elements at the end of each integral image row, in elements, with range [0, infinity)
726  * @param center 2D coordinates of the center point of the patch, with range [patchWidth/2, frameWidth - patchWidth/2)x[patchHeight/2, frameHeight - patchHeight/2) for PC_CENTER
727  * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
728  * @param patchWidth Width of the calculated patch in pixel with range [1, frameWidth - 1]
729  * @param patchHeight Height of the calculated patch in pixel with range [1, frameHeight - 1]
730  * @return The resulting sum of the pixel intensities
731  */
732  static Scalar patchIntensitySum1Channel(const uint32_t* linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2& center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight);
734  /**
735  * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the bilinear interpolation) or whether the homography relies on missing image information.
736  * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
737  * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
738  * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
739  * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
740  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
741  * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
742  * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
743  * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
744  */
745  static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
747  private:
749  /**
750  * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
751  * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
752  * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
753  * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
754  * @param input The input frame that will be transformed, must be valid
755  * @param inputWidth Width of both images in pixel, with range [1, infinity)
756  * @param inputHeight Height of both images pixel, with range [1, infinity)
757  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
758  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
759  * @param output The output frame using the given homography, must be valid
760  * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
761  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
762  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
763  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
764  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
765  * @param worker Optional worker object to distribute the computational load
766  * @tparam tChannels Number of channels of the frame
767  * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
768  */
769  template <unsigned int tChannels>
770  static inline void homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
772  /**
773  * Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-defined scaling factors.
774  * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
775  * Information: This function is the equivalent to OpenCV's cv::resize().
776  * @param source The source frame buffer providing the image information to be resized, must be valid
777  * @param target The target frame buffer receiving the rescaled image information, must be valid
778  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
779  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
780  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
781  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
782  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
783  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
784  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
785  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
786  * @param worker Optional worker object to distribute the computation to several CPU cores
787  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
788  */
789  template <unsigned int tChannels>
790  static inline void scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
792  /**
793  * Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
794  * @param source The image data of the source frame to be resized, must be valid
795  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
796  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
797  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
798  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
799  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
800  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
801  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
802  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
803  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
804  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
805  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
806  * @tparam tChannels Number of frame channels, with range [0, infinity)
807  */
808  template <unsigned int tChannels>
809  static void scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
811  /**
812  * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
813  * This function uses interpolation factors with 7 bit precision and does not apply any SIMD instructions.
814  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
815  * @param targetRow The target row receiving the interpolation result, must be valid
816  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
817  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
818  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
819  * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
820  * @see interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON<tChannels>().
821  */
822  static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
824  /**
825  * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
826  * This function does not apply any SIMD instructions.<br>
827  * The length of both source rows is identical with the length of the target row.
828  * @param sourceRowTop The top source row to be used for interpolation, must be valid
829  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
830  * @param targetRow The target row receiving the interpolation result, must be valid
831  * @param elements The number of elements in the row to (width * channels), with range [1, infinity)
832  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
833  * @tparam T The data type of each element, should be 'float'
834  */
835  template <typename T>
836  static void interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
838  /**
839  * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
840  * This function does not apply any SIMD instructions.
841  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
842  * @param targetRow The target row receiving the interpolation result, must be valid
843  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
844  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
845  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
846  * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
847  * @tparam T The data type of each element, should be 'float'
848  * @tparam tChannels The number of frame channels this function can handle, should be 1
849  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
850  */
851  template <typename T, unsigned int tChannels>
852  static void interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
856  /**
857  * Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
858  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.<br>
859  * The length of both source rows is identical with the length of the target row.
860  * @param sourceRowTop The top source row to be used for interpolation, must be valid
861  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
862  * @param targetRow The target row receiving the interpolation result, must be valid
863  * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
864  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 128 - factorBottom, with range [0, 128]
865  */
866  static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t* sourceRowTop, const uint8_t* sourceRowBottom, uint8_t* targetRow, const unsigned int elements, const unsigned int factorBottom);
868  /**
869  * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
870  * This function applies NEON instructions.<br>
871  * The length of both source rows is identical with the length of the target row.
872  * @param sourceRowTop The top source row to be used for interpolation, must be valid
873  * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
874  * @param targetRow The target row receiving the interpolation result, must be valid
875  * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
876  * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
877  * @tparam T The data type of each element, should be 'float'
878  */
879  template <typename T>
880  static void interpolateRowVerticalNEON(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
882  /**
883  * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
884  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
885  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
886  * @param targetRow The target row receiving the interpolation result, must be valid
887  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
888  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
889  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
890  * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
891  * @tparam tChannels The number of frame channels this function can handle, possible values are 1, 4
892  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
893  */
894  template <unsigned int tChannels>
895  static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
897  /**
898  * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
899  * This function applies NEON instructions.
900  * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
901  * @param targetRow The target row receiving the interpolation result, must be valid
902  * @param targetWidth The with of the target row in pixel, with range [8, infinity)
903  * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
904  * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
905  * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
906  * @tparam T The data type of each element, should be 'float'
907  * @tparam tChannels The number of frame channels this function can handle, should be 1
908  * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
909  */
910  template <typename T, unsigned int tChannels>
911  static void interpolateRowHorizontalNEON(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
913  /**
914  * Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
915  * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
916  * @param source The image data of the source frame to be resized, must be valid
917  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
918  * @param sourceWidth Width of the source frame in pixel, with range [2, 65.535]
919  * @param sourceHeight Height of the source frame in pixel, with range [1, 65.535]
920  * @param targetWidth Width of the target frame in pixel, with range [tMinimalTargetWidth, 65.535]
921  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
922  * @param channels The number of channels both frames have, with range [1, infinity)
923  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
924  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
925  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
926  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
927  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
928  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
929  * @see interpolateRowVertical8BitPerChannel7BitPrecisionNEON(), interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON().
930  */
931  static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
935  /**
936  * Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
937  * @param source The image data of the source frame to be resized, must be valid
938  * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
939  * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
940  * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
941  * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
942  * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
943  * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
944  * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
945  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
946  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
947  * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
948  * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
949  * @tparam T The data type of each pixel channel, e.g., float, double, int, short, ...
950  * @tparam TScale The data type of the internal scaling factors to be used, should be 'float' or 'double'
951  * @tparam tChannels Number of frame channels, with range [0, infinity)
952  */
953  template <typename T, typename TScale, unsigned int tChannels>
954  static void scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
956  /**
957  * Rotates a subset of a given frame by a bilinear interpolation.
958  * @param source The source frame to be rotated, must be valid
959  * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
960  * @param width The width of the source and target frame in pixel, with range [1, infinity)
961  * @param height The height of the source and target frame in pixel, with range [1, infinity)
962  * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
963  * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
964  * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
965  * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
966  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
967  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
968  * @param firstTargetRow The first row of the target frame to be handled, with range [0, height)
969  * @param numberTargetRows The number of rows in the target frame to be handled, with range [1, height - firstTargetRow]
970  * @tparam tChannels Number of frame channels, with range [1, infinity)
971  */
972  template <unsigned int tChannels>
973  static void rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
975  /**
976  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
977  * The affine transform must be provided in the following form: `sourcePoint = source_A_target * targetPoint`
978  * This function does not apply SIMD instructions and can be used for any frame dimensions.
979  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
980  * <pre>
981  * a c e
982  * b d f
983  * 0 0 1
984  * </pre>
985  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
986  * @param source Input frame that will be transformed
987  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
988  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
989  * @param source_A_target Affine transformation which is applied to the source frame.
990  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
991  * @param target Output frame using the given affine transform
992  * @param targetWidth The width of the target image in pixel, with range [1, infinity)
993  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
994  * @param firstTargetRow The first target row to be handled
995  * @param numberTargetRows Number of target rows to be handled
996  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
997  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
998  * @tparam tChannels Number of frame channels, with range [1, infinity)
999  * @see affine8BitPerChannelSSESubset(), affine8BitPerChannelNEONSubset()
1000  */
1001  template <unsigned int tChannels>
1002  static inline void affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1004  /**
1005  * Transforms an 8 bit per channel frame using the given homography.
1006  * The homography must provide the following transformation: inputPoint = homography * outputPoint
1007  * This function does not apply SIMD instructions and can be used for any frame dimensions.
1008  * @param input The input frame that will be transformed
1009  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1010  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1011  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1012  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1013  * @param output The output frame using the given homography
1014  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1015  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1016  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1017  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1018  * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1019  * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1020  * @tparam tChannels Number of frame channels, with range [1, infinity)
1021  * @see homography8BitPerChannelSSESubset(), homography8BitPerChannelNEONSubset()
1022  */
1023  template <unsigned int tChannels>
1024  static inline void homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1026  /**
1027  * Transforms a frame with (almost) arbitrary pixel format using the given homography.
1028  * This function does not apply SIMD instructions and can be used for any frame dimensions.
1029  * @param input The input frame that will be transformed
1030  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1031  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1032  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1033  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1034  * @param output The output frame using the given homography
1035  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1036  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1037  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1038  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1039  * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1040  * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1041  * @tparam T Data type of each pixel channel, e.g., float, double, int
1042  * @tparam tChannels Number of frame channels, with range [1, infinity)
1043  * @see homography8BitPerChannelSSESubset().
1044  */
1045  template <typename T, unsigned int tChannels>
1046  static inline void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1050  /**
1051  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
1052  * This function applies SSE instructions.<br>
1053  * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1054  * This function has the property: sourcePoint = source_A_target * targetPoint
1055  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1056  * <pre>
1057  * a c e
1058  * b d f
1059  * 0 0 1
1060  * </pre>
1061  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1062  * @param source Input frame that will be transformed
1063  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1064  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1065  * @param source_A_target Affine transformation which is applied to source frame.
1066  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1067  * @param target The target frame where the result of the transformation will be stored
1068  * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1069  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1070  * @param firstTargetRow The first target row to be handled
1071  * @param numberTargetRows Number of target rows to be handled
1072  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1073  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1074  * @tparam tChannels Number of frame channels
1075  * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
1076  */
1077  template <unsigned int tChannels>
1078  static inline void affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1080  /**
1081  * Transforms an 8 bit per channel frame using the given homography.
1082  * This function applies SSE instructions.<br>
1083  * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames
1084  * @param input The input frame that will be transformed, must be valid
1085  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1086  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1087  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1088  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1089  * @param output The output frame using the given homography, must be valid
1090  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1091  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1092  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1093  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1094  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1095  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1096  * @tparam tChannels Number of frame channels, with range [1, infinity)
1097  * @see homography8BitPerChannelSubset().
1098  */
1099  template <unsigned int tChannels>
1100  static inline void homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1102  /**
1103  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1104  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1105  * @param source The source image in which the four independent pixels are located, must be valid
1106  * @param offsetsTopLeft The four offsets within the source image for the four top-left pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1107  * @param offsetsTopRight The four offsets within the source image for the four top-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1108  * @param offsetsBottomLeft The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1109  * @param offsetsBottomRight The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1110  * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1111  * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1112  * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1113  * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1114  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1115  * @tparam tChannels The number of frame channels, with range [1, infinity)
1116  */
1117  template <unsigned int tChannels>
1118  static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1120  /**
1121  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1122  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1123  * @param m128_sourcesTopLeft The pixel values of the four top left pixels, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1124  * @param m128_sourcesTopRight The pixel values of the four top right pixels, starting at the first byte may contain unused bytes at the end
1125  * @param m128_sourcesBottomLeft The pixel values of the four bottom left pixels, starting at the first byte may contain unused bytes at the end
1126  * @param m128_sourcesBottomRight The pixel values of the four bottom right pixels, starting at the first byte may contain unused bytes at the end
1127  * @param m128_factorsTopLeft The four interpolation factors of the four top left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1128  * @param m128_factorsTopRight The four interpolation factors of the four top right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1129  * @param m128_factorsBottomLeft The four interpolation factors of the four bottom left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1130  * @param m128_factorsBottomRight The four interpolation factors of the four bottom right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1131  * @return The resulting interpolated pixel values, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1132  * @tparam tChannels The number of frame channels, with range [3, 4]
1133  */
1134  template <unsigned int tChannels>
1135  static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i& m128_sourcesTopLeft, const __m128i& m128_sourcesTopRight, const __m128i& m128_sourcesBottomLeft, const __m128i& m128_sourcesBottomRight, const __m128i& m128_factorsTopLeft, const __m128i& m128_factorsTopRight, const __m128i& m128_factorsBottomLeft, const __m128i& m128_factorsBottomRight);
1141  /**
1142  * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
1143  * This function applies NEON instructions.<br>
1144  * This one has the property: sourcePoint = source_A_target * targetPoint
1145  * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1146  * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1147  * <pre>
1148  * a c e
1149  * b d f
1150  * 0 0 1
1151  * </pre>
1152  * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1153  * @param source The source frame that will be transformed
1154  * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1155  * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1156  * @param source_A_target Affine transform used to transform the given source frame.
1157  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1158  * @param target The target frame using the given affine transform
1159  * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1160  * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1161  * @param firstTargetRow The first target row to be handled
1162  * @param numberTargetRows Number of target rows to be handled
1163  * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1164  * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1165  * @tparam tChannels Number of frame channels, with range [1, infinity)
1166  * @see homography8BitPerChannelSubset().
1167  */
1168  template <unsigned int tChannels>
1169  static inline void affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1171  /**
1172  * Transforms an 8 bit per channel frame using the given homography.
1173  * This function applies NEON instructions.<br>
1174  * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames.
1175  * @param input The input frame that will be transformed
1176  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1177  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1178  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1179  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1180  * @param output The output frame using the given homography
1181  * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1182  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1183  * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1184  * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1185  * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1186  * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1187  * @tparam tChannels Number of frame channels, with range [1, infinity)
1188  * @see homography8BitPerChannelSubset().
1189  */
1190  template <unsigned int tChannels>
1191  static inline void homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1193  /**
1194  * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1195  * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1196  * @param source The source image in which the four independent pixels are located, must be valid
1197  * @param offsetsTopLeftElements The four offsets within the source image for the four top-left pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1198  * @param offsetsTopRightElements The four offsets within the source image for the four top-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1199  * @param offsetsBottomLeftElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1200  * @param offsetsBottomRightElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1201  * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1202  * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1203  * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1204  * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1205  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1206  * @tparam tChannels The number of frame channels, with range [1, infinity)
1207  */
1208  template <unsigned int tChannels>
1209  static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1211  /**
1212  * Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must be known already (top-left, top-right, bottom-left, and bottom-right), further the interpolation factors must be known already.
1213  * @param topLeft_u_8x8 The 8 top left pixel values to be used for interpolation
1214  * @param topRight_u_8x8 The 8 top right pixel values to be used for interpolation
1215  * @param bottomLeft_u_8x8 The 8 bottom left pixel values to be used for interpolation
1216  * @param bottomRight_u_8x8 The 8 bottom right pixel values to be used for interpolation
1217  * @param factorsRight_factorsBottom_128_u_8x16 The eight horizontal interpolation factors for right pixels, and the eight vertical interpolation factors for the bottom pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1218  * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1219  */
1220  static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1224  /**
1225  * Transforms an 8 bit per channel frame using the given homographies.
1226  * @param input The input frame that will be transformed
1227  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1228  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1229  * @param homographies Homographies used to transform the given input frame
1230  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1231  * @param output The output frame using the given homography
1232  * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1233  * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1234  * @param outputOriginX The horizontal coordinate of the output frame's origin
1235  * @param outputOriginY The vertical coordinate of the output frame's origin
1236  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1237  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1238  * @param inputPaddingElements The number of padding elements at the end of each input frame, in elements, with range [0, infinity)
1239  * @param outputPaddingElements The number of padding elements at the end of each output frame, in elements, with range [0, infinity)
1240  * @param firstOutputRow The first output row to be handled
1241  * @param numberOutputRows Number of output rows to be handled
1242  * @tparam tChannels Number of frame channels
1243  */
1244  template <unsigned int tChannels>
1245  static inline void homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1247  /**
1248  * Transforms an 8 bit per channel frame using the given homography.
1249  * @param input The input frame that will be transformed, must be valid
1250  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1251  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1252  * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1253  * @param output The output frame resulting by application of the given homography, must be valid
1254  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1255  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1256  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1257  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1258  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1259  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1260  * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
1261  * @param firstOutputRow The first output row to be handled
1262  * @param numberOutputRows Number of output rows to be handled
1263  * @tparam tChannels Number of frame channels, with range [1, infinity)
1264  */
1265  template <unsigned int tChannels>
1266  static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1268  /**
1269  * Transforms an 8 bit per channel frame using the given homography.
1270  * @param input The input frame that will be transformed
1271  * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1272  * @param inputHeight Height of both input images pixel, with range [1, infinity)
1273  * @param homographies Homographies used to transform the given input frame
1274  * @param output The output frame resulting by application of the given homography
1275  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1276  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1277  * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1278  * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1279  * @param outputOriginX The horizontal coordinate of the output frame's origin
1280  * @param outputOriginY The vertical coordinate of the output frame's origin
1281  * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1282  * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1283  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1284  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1285  * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
1286  * @param firstOutputRow The first output row to be handled
1287  * @param numberOutputRows Number of output rows to be handled
1288  * @tparam tChannels Number of frame channels
1289  */
1290  template <unsigned int tChannels>
1291  static inline void homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1293  /**
1294  * Transforms an 8 bit per channel frame using the given homography.
1295  * @param inputCamera The pinhole camera profile to be applied for the input frame
1296  * @param outputCamera The pinhole camera profile to be applied for the output frame
1297  * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1298  * @param input The input frame that will be transformed
1299  * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1300  * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
1301  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1302  * @param output The output frame resulting by application of the given homography
1303  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1304  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1305  * @param firstRow The first row to be handled
1306  * @param numberRows Number of rows to be handled
1307  * @tparam tChannels Number of frame channels
1308  */
1309  template <unsigned int tChannels>
1310  static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1312  /**
1313  * Transforms an 8 bit per channel frame using the given homography.
1314  * @param inputCamera The pinhole camera profile to be applied for the input frame
1315  * @param outputCamera The pinhole camera profile to be applied for the output frame
1316  * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1317  * @param input The input frame that will be transformed, must be valid
1318  * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1319  * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1320  * @param output The output frame resulting by application of the given homography
1321  * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1322  * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1323  * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
1324  * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1325  * @param firstRow The first row to be handled
1326  * @param numberRows Number of rows to be handled
1327  * @tparam tChannels Number of frame channels
1328  */
1329  template <unsigned int tChannels>
1330  static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
1332  /**
1333  * Transforms a subset of a given input frame with uint8_t as element type into an output frame by application of an interpolation lookup table.
1334  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1335  * @param input The input frame which will be transformed, must be valid
1336  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1337  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1338  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1339  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1340  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1341  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1342  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1343  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1344  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1345  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1346  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1347  */
1348  template <unsigned int tChannels>
1349  static void lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1351  /**
1352  * Transforms a subset of a given input frame with arbitrary element type into an output frame by application of an interpolation lookup table.
1353  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1354  * @param input The input frame which will be transformed, must be valid
1355  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1356  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1357  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1358  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1359  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
1360  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
1361  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1362  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1363  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1364  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1365  * @tparam T Data type of each pixel channel, must not be 'uint8_t'
1366  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1367  */
1368  template <typename T, unsigned int tChannels>
1369  static void lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1373  /**
1374  * Transforms a subset of a given input frame into an output frame by application of an interpolation lookup table and uses NEON instructions.
1375  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1376  * @param input The input frame which will be transformed, must be valid
1377  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1378  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1379  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), with table width >= 4, must be valid
1380  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1381  * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1382  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1383  * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1384  * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1385  * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1386  * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1387  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1388  */
1389  template <unsigned int tChannels>
1390  static void lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1394  /**
1395  * Transforms a given input frame into an output frame by application of an interpolation lookup table.
1396  * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1397  * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
1398  * @param input The input frame which will be transformed
1399  * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1400  * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1401  * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1402  * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1403  * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1404  * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1405  * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
1406  * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
1407  * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
1408  * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
1409  * @param firstRow First row to be handled
1410  * @param numberRows Number of rows to be handled
1411  * @tparam tChannels Number of channels of the frame
1412  */
1413  template <unsigned int tChannels>
1414  static void lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1415 };
1417 inline bool FrameInterpolatorBilinear::Comfort::resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker)
1418 {
1419  ocean_assert(frame.isValid());
1420  ocean_assert(width >= 1u && height >= 1u);
1422  Frame target(FrameType(frame, width, height));
1424  if (!resize(frame, target, worker))
1425  {
1426  return false;
1427  }
1429  frame = std::move(target);
1430  return true;
1431 }
1433 template <typename TScalar>
1434 bool FrameInterpolatorBilinear::Comfort::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result)
1435 {
1436  ocean_assert(frame != nullptr);
1437  ocean_assert(channels >= 1u && channels <= 8u);
1439  if (pixelCenter == PC_TOP_LEFT)
1440  {
1441  switch (channels)
1442  {
1443  case 1u:
1444  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1445  return true;
1447  case 2u:
1448  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1449  return true;
1451  case 3u:
1452  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1453  return true;
1455  case 4u:
1456  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1457  return true;
1459  case 5u:
1460  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1461  return true;
1463  case 6u:
1464  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1465  return true;
1467  case 7u:
1468  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1469  return true;
1471  case 8u:
1472  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1473  return true;
1474  }
1475  }
1476  else
1477  {
1478  ocean_assert(pixelCenter == PC_CENTER);
1480  switch (channels)
1481  {
1482  case 1u:
1483  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1484  return true;
1486  case 2u:
1487  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1488  return true;
1490  case 3u:
1491  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1492  return true;
1494  case 4u:
1495  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1496  return true;
1498  case 5u:
1499  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1500  return true;
1502  case 6u:
1503  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1504  return true;
1506  case 7u:
1507  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1508  return true;
1510  case 8u:
1511  FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1512  return true;
1513  }
1514  }
1516  ocean_assert(false && "Invalid channel number");
1517  return false;
1518 }
1520 template <typename TSource, typename TTarget, typename TScalar, typename TIntermediate>
1521 bool FrameInterpolatorBilinear::Comfort::interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
1522 {
1523  ocean_assert(frame != nullptr);
1524  ocean_assert(channels >= 1u && channels <= 8u);
1526  if (pixelCenter == PC_TOP_LEFT)
1527  {
1528  switch (channels)
1529  {
1530  case 1u:
1531  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1532  return true;
1534  case 2u:
1535  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1536  return true;
1538  case 3u:
1539  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1540  return true;
1542  case 4u:
1543  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1544  return true;
1546  case 5u:
1547  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1548  return true;
1550  case 6u:
1551  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1552  return true;
1554  case 7u:
1555  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1556  return true;
1558  case 8u:
1559  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1560  return true;
1561  }
1562  }
1563  else
1564  {
1565  ocean_assert(pixelCenter == PC_CENTER);
1567  switch (channels)
1568  {
1569  case 1u:
1570  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1571  return true;
1573  case 2u:
1574  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1575  return true;
1577  case 3u:
1578  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1579  return true;
1581  case 4u:
1582  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1583  return true;
1585  case 5u:
1586  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1587  return true;
1589  case 6u:
1590  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1591  return true;
1593  case 7u:
1594  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1595  return true;
1597  case 8u:
1598  FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1599  return true;
1600  }
1601  }
1603  ocean_assert(false && "Invalid channel number");
1604  return false;
1605 }
1607 template <typename T, unsigned int tChannels>
1608 inline void FrameInterpolatorBilinear::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1609 {
1610  ocean_assert(source != nullptr && target != nullptr);
1611  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1612  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1614  const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1615  const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1617  scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1618 }
1620 template <typename T, unsigned int tChannels>
1621 inline void FrameInterpolatorBilinear::scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1622 {
1623  ocean_assert(source != nullptr && target != nullptr);
1624  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1625  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1626  ocean_assert(sourceX_s_targetX > 0.0);
1627  ocean_assert(sourceY_s_targetY > 0.0);
1629  if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1630  {
1631  FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1632  return;
1633  }
1635  if (std::is_same<T, uint8_t>::value)
1636  {
1637  // we have a SIMD-based optimized version for 'uint8_t' data types
1639  scale8BitPerChannel<tChannels>((const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1640  }
1641  else
1642  {
1643  typedef typename FloatTyper<T>::Type TScale;
1645  if (worker)
1646  {
1647  worker->executeFunction(Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1648  }
1649  else
1650  {
1651  scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1652  }
1653  }
1654 }
1656 template <unsigned int tChannels>
1657 inline void FrameInterpolatorBilinear::affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const CV::PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1658 {
1659  // If applicable, apply an additional translation to the affine transformation.
1660  const SquareMatrix3 adjustedAffineTransform = source_A_target * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(targetOrigin.x()), Scalar(targetOrigin.y()), 1));
1662  if (worker)
1663  {
1664  if (targetWidth >= 4u)
1665  {
1667  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1668  return;
1670  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1671  return;
1672 #endif
1673  }
1675  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1676  }
1677  else
1678  {
1679  if (targetWidth >= 4u)
1680  {
1682  affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1683  return;
1685  affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1686  return;
1687 #endif
1688  }
1690  affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1691  }
1692 }
1694 template <unsigned int tChannels>
1695 inline void FrameInterpolatorBilinear::homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1696 {
1697  // we adjust the homography to address 'outputOrigin'
1698  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1700  if (worker)
1701  {
1702  if (outputWidth >= 4u)
1703  {
1705  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1706  return;
1708  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1709  return;
1710 #endif
1711  }
1713  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1714  }
1715  else
1716  {
1717  if (outputWidth >= 4u)
1718  {
1720  homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1721  return;
1723  homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1724  return;
1725 #endif
1726  }
1728  homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1729  }
1730 }
1732 template <typename T, unsigned int tChannels>
1733 inline void FrameInterpolatorBilinear::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1734 {
1735  if (std::is_same<T, uint8_t>::value)
1736  {
1737  homography8BitPerChannel<tChannels>((const uint8_t*)input, inputWidth, inputHeight, input_H_output, (const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1738  return;
1739  }
1740  else
1741  {
1742  // we adjust the homography to address 'outputOrigin'
1743  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1745  if (worker)
1746  {
1747  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1748  }
1749  else
1750  {
1751  homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1752  }
1753  }
1754 }
1756 template <unsigned int tChannels>
1757 inline void FrameInterpolatorBilinear::homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1758 {
1759  if (worker)
1760  {
1761  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1762  }
1763  else
1764  {
1765  homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1766  }
1767 }
1769 template <unsigned int tChannels>
1770 inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker)
1771 {
1772  // we adjust the homography to address 'outputOrigin'
1773  const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1775  if (worker)
1776  {
1777  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1778  }
1779  else
1780  {
1781  homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1782  }
1783 }
1785 template <unsigned int tChannels>
1786 inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1787 {
1788  if (worker)
1789  {
1790  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1791  }
1792  else
1793  {
1794  homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1795  }
1796 }
1798 template <unsigned int tChannels>
1799 inline void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1800 {
1801  const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1803  const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1805  if (worker)
1806  {
1807  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.height());
1808  }
1809  else
1810  {
1811  homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.height());
1812  }
1813 }
1815 template <unsigned int tChannels>
1816 inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1817 {
1818  const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1820  const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1822  if (worker)
1823  {
1824  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.height(), 11u, 12u, 10u);
1825  }
1826  else
1827  {
1828  homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.height());
1829  }
1830 }
1832 template <typename T, unsigned int tChannels>
1833 inline void FrameInterpolatorBilinear::lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1834 {
1835  if constexpr (std::is_same<T, uint8_t>::value)
1836  {
1838  if ((tChannels >= 1u && input_LT_output.sizeX() >= 8) || (tChannels >= 2u && input_LT_output.sizeX() >= 4))
1839  {
1840  // NEON implementation for 1 channel: min width 8; for 2+ channels: min width 4
1842  if (worker)
1843  {
1844  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1845  }
1846  else
1847  {
1848  lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1849  }
1851  return;
1852  }
1855  if (worker)
1856  {
1857  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)input_LT_output.sizeY(), 9u, 10u, 20u);
1858  }
1859  else
1860  {
1861  lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1862  }
1863  }
1864  else
1865  {
1866  ocean_assert((!std::is_same<T, uint8_t>::value));
1868  if (worker)
1869  {
1870  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1871  }
1872  else
1873  {
1874  lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1875  }
1876  }
1877 }
1879 template <unsigned int tChannels>
1880 inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1881 {
1882  if (worker)
1883  {
1884  worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 11u, 12u, 20u);
1885  }
1886  else
1887  {
1888  lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1889  }
1890 }
1892 template <typename T, unsigned int tChannels>
1893 void FrameInterpolatorBilinear::resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target, Worker* worker, const unsigned int binSizeInPixel, const T* borderColor)
1894 {
1895  static_assert(tChannels >= 1u, "Invalid channel number!");
1897  ocean_assert(sourceFrame != nullptr);
1898  ocean_assert(sourceCamera.isValid());
1899  ocean_assert(source_R_target.isOrthonormal());
1900  ocean_assert(targetCamera.isValid());
1901  ocean_assert(targetFrame != nullptr);
1902  ocean_assert(binSizeInPixel >= 1u);
1904  const size_t binsX = std::max(1u, targetCamera.width() / binSizeInPixel);
1905  const size_t binsY = std::max(1u, targetCamera.height() / binSizeInPixel);
1906  CV::FrameInterpolatorBilinear::LookupTable lookupTable(targetCamera.width(), targetCamera.height(), binsX, binsY);
1908  for (size_t yBin = 0; yBin <= lookupTable.binsY(); ++yBin)
1909  {
1910  for (size_t xBin = 0; xBin <= lookupTable.binsX(); ++xBin)
1911  {
1912  const Vector2 cornerPosition = lookupTable.binTopLeftCornerPosition(xBin, yBin);
1914  constexpr bool makeUnitVector = false; // we don't need a unit/normalized vector as we project the vector into the camera again
1916  const Vector3 rayI = source_R_target * targetCamera.vector(cornerPosition, makeUnitVector);
1917  const Vector3 rayIF = Vector3(rayI.x(), -rayI.y(), -rayI.z());
1919  if (rayIF.z() > Numeric::eps())
1920  {
1921  const Vector2 projectedPoint = sourceCamera.projectToImageIF(rayIF);
1923  lookupTable.setBinTopLeftCornerValue(xBin, yBin, projectedPoint - cornerPosition);
1924  }
1925  else
1926  {
1927  // simply a coordinate far outside the input
1928  lookupTable.setBinTopLeftCornerValue(xBin, yBin, Vector2(Scalar(sourceCamera.width() * 10u), Scalar(sourceCamera.height() * 10u)));
1929  }
1930  }
1931  }
1933  lookup<T, tChannels>(sourceFrame, sourceCamera.width(), sourceCamera.height(), lookupTable, true /*offset*/, borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1935  if (source_OLT_target)
1936  {
1937  *source_OLT_target = std::move(lookupTable);
1938  }
1939 }
1941 template <unsigned int tChannels>
1942 void FrameInterpolatorBilinear::rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker, const uint8_t* borderColor)
1943 {
1944  static_assert(tChannels != 0u, "Invalid channel number!");
1946  ocean_assert(source != nullptr && target != nullptr);
1947  ocean_assert(width >= 1u && height >= 1u);
1949  if (worker)
1950  {
1951  worker->executeFunction(Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1952  }
1953  else
1954  {
1955  rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1956  }
1957 }
1959 template <unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar>
1960 inline void FrameInterpolatorBilinear::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result)
1961 {
1962  static_assert(tChannels != 0u, "Invalid channel number!");
1963  static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
1965  ocean_assert(frame != nullptr && result != nullptr);
1966  ocean_assert(width != 0u && height != 0u);
1968  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
1970  ocean_assert(position.x() >= TScalar(0));
1971  ocean_assert(position.y() >= TScalar(0));
1973  if constexpr (tPixelCenter == PC_TOP_LEFT)
1974  {
1975  ocean_assert(position.x() <= TScalar(width - 1u));
1976  ocean_assert(position.y() <= TScalar(height - 1u));
1978  const unsigned int left = (unsigned int)(position.x());
1979  const unsigned int top = (unsigned int)(position.y());
1980  ocean_assert(left < width && top < height);
1982  const TScalar tx = position.x() - TScalar(left);
1983  ocean_assert(tx >= 0 && tx <= 1);
1984  const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
1985  const unsigned int txi_ = 128u - txi;
1987  const TScalar ty = position.y() - TScalar(top);
1988  ocean_assert(ty >= 0 && ty <= 1);
1989  const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
1990  const unsigned int tyi_ = 128u - tyi;
1992  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
1993  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
1995  const uint8_t* const topLeft = frame + top * frameStrideElements + tChannels * left;
1997  const unsigned int txty = txi * tyi;
1998  const unsigned int txty_ = txi * tyi_;
1999  const unsigned int tx_ty = txi_ * tyi;
2000  const unsigned int tx_ty_ = txi_ * tyi_;
2002  for (unsigned int n = 0u; n < tChannels; ++n)
2003  {
2004  result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2005  }
2006  }
2007  else
2008  {
2009  ocean_assert(tPixelCenter == PC_CENTER);
2011  ocean_assert(position.x() <= TScalar(width));
2012  ocean_assert(position.y() <= TScalar(height));
2014  const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2015  const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2017  const unsigned int left = (unsigned int)(xShifted);
2018  const unsigned int top = (unsigned int)(yShifted);
2020  ocean_assert(left < width);
2021  ocean_assert(top < height);
2023  const TScalar tx = xShifted - TScalar(left);
2024  const TScalar ty = yShifted - TScalar(top);
2026  ocean_assert(tx >= 0 && tx <= 1);
2027  ocean_assert(ty >= 0 && ty <= 1);
2029  const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2030  const unsigned int txi_ = 128u - txi;
2032  const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2033  const unsigned int tyi_ = 128u - tyi;
2035  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2036  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2038  const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2040  const unsigned int txty = txi * tyi;
2041  const unsigned int txty_ = txi * tyi_;
2042  const unsigned int tx_ty = txi_ * tyi;
2043  const unsigned int tx_ty_ = txi_ * tyi_;
2045  for (unsigned int n = 0u; n < tChannels; ++n)
2046  {
2047  result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2048  }
2049  }
2050 }
2052 template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar, typename TIntermediate>
2053 inline void FrameInterpolatorBilinear::interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
2054 {
2055  static_assert(tChannels != 0u, "Invalid channel number!");
2056  static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
2058  ocean_assert(frame != nullptr && result != nullptr);
2059  ocean_assert(width != 0u && height != 0u);
2061  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2063  ocean_assert(position.x() >= TScalar(0));
2064  ocean_assert(position.y() >= TScalar(0));
2066  if constexpr (tPixelCenter == PC_TOP_LEFT)
2067  {
2068  ocean_assert(position.x() <= TScalar(width - 1u));
2069  ocean_assert(position.y() <= TScalar(height - 1u));
2071  const unsigned int left = (unsigned int)(position.x());
2072  const unsigned int top = (unsigned int)(position.y());
2074  const TScalar tx = position.x() - TScalar(left);
2075  ocean_assert(tx >= 0 && tx <= 1);
2077  const TScalar ty = position.y() - TScalar(top);
2078  ocean_assert(ty >= 0 && ty <= 1);
2080  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2081  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2083  const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2085  const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2086  const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2087  const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2088  const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2090  ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2092  for (unsigned int n = 0u; n < tChannels; ++n)
2093  {
2094  result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2095  }
2096  }
2097  else
2098  {
2099  ocean_assert(tPixelCenter == PC_CENTER);
2101  ocean_assert(position.x() <= TScalar(width));
2102  ocean_assert(position.y() <= TScalar(height));
2104  const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2105  const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2107  const unsigned int left = (unsigned int)(xShifted);
2108  const unsigned int top = (unsigned int)(yShifted);
2110  ocean_assert(left < width);
2111  ocean_assert(top < height);
2113  const TScalar tx = xShifted - TScalar(left);
2114  const TScalar ty = yShifted - TScalar(top);
2116  ocean_assert(tx >= 0 && tx <= 1);
2117  ocean_assert(ty >= 0 && ty <= 1);
2119  const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2120  const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2122  const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2124  const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2125  const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2126  const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2127  const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2129  ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2131  for (unsigned int n = 0u; n < tChannels; ++n)
2132  {
2133  result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2134  }
2135  }
2136 }
2138 template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
2139 inline void FrameInterpolatorBilinear::interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements)
2140 {
2141  static_assert(tChannels != 0u, "Invalid channel number!");
2143  ocean_assert(frame && result);
2145  const Vector2 pos(position.x() - Scalar(0.5), position.y() - Scalar(0.5));
2147  // check whether the position is outside the frame and will therefore be 100% transparent
2148  if (pos.x() <= Scalar(-1) || pos.y() <= Scalar(-1) || pos.x() >= Scalar(width) || pos.y() >= Scalar(height))
2149  {
2150  for (unsigned int n = 0u; n < tChannels - 1u; ++n)
2151  {
2153  }
2155  result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2157  return;
2158  }
2160  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2162  const int left = int(Numeric::floor(pos.x()));
2163  const int top = int(Numeric::floor(pos.y()));
2165  ocean_assert(left >= -1 && left < int(width));
2166  ocean_assert(top >= -1 && top < int(height));
2168  if ((unsigned int)left < width - 1u && (unsigned int)top < height - 1u)
2169  {
2170  // we have a valid pixel position for the left, top, right and bottom pixel
2172  const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2173  const unsigned int txi_ = 128u - txi;
2175  const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2176  const unsigned int tyi_ = 128u - tyi;
2178  const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2180  const unsigned int txty = txi * tyi;
2181  const unsigned int txty_ = txi * tyi_;
2182  const unsigned int tx_ty = txi_ * tyi;
2183  const unsigned int tx_ty_ = txi_ * tyi_;
2185  for (unsigned int n = 0u; n < tChannels; ++n)
2186  {
2187  result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2188  + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2189  }
2190  }
2191  else
2192  {
2193  // we do not have a valid pixel for all 4-neighborhood pixels
2195  const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2196  const unsigned int txi_ = 128u - txi;
2198  const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2199  const unsigned int tyi_ = 128u - tyi;
2201  const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2202  const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2204  ocean_assert(left < int(width) && top < int(height));
2205  const uint8_t* const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2207  const unsigned int txty = txi * tyi;
2208  const unsigned int txty_ = txi * tyi_;
2209  const unsigned int tx_ty = txi_ * tyi;
2210  const unsigned int tx_ty_ = txi_ * tyi_;
2212  for (unsigned int n = FrameBlender::SourceOffset<tAlphaAtFront>::data(); n < tChannels + FrameBlender::SourceOffset<tAlphaAtFront>::data() - 1u; ++n)
2213  {
2214  result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2215  + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2216  }
2218  const uint8_t alphaTopLeft = (left >= 0 && top >= 0) ? topLeft[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2219  const uint8_t alphaTopRight = (left + 1u < width && top >= 0) ? topLeft[rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2220  const uint8_t alphaBottomLeft = (left >= 0 && top + 1u < height) ? topLeft[bottomOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2221  const uint8_t alphaBottomRight = (left + 1u < width && top + 1u < height) ? topLeft[bottomOffset + rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2223  result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = (alphaTopLeft * tx_ty_ + alphaTopRight * txty_ + alphaBottomLeft * tx_ty + alphaBottomRight * txty + 8192u) >> 14u;
2224  }
2225 }
2227 template <unsigned int tChannels>
2228 void FrameInterpolatorBilinear::affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberOutputRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2229 {
2230  static_assert(tChannels >= 1u, "Invalid channel number!");
2232  ocean_assert(source != nullptr && target != nullptr);
2233  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2234  ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2235  ocean_assert(source_A_target);
2236  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2238  ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2240  const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2242  const Scalar scalarSourceWidth_1 = Scalar(sourceWidth - 1u);
2243  const Scalar scalarSourceHeight_1 = Scalar(sourceHeight - 1u);
2245  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2247  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2248  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2250  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2251  {
2252  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2254  /*
2255  * We can slightly optimize the 3x3 matrix multiplication:
2256  *
2257  * | X0 Y0 Z0 | | x |
2258  * | X1 Y1 Z1 | * | y |
2259  * | 0 0 1 | | 1 |
2260  *
2261  * | xx | | X0 * x | | Y0 * y + Z0 |
2262  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2263  *
2264  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2265  *
2266  * C0 = Y0 * y + Z0
2267  * C1 = Y1 * y + Z1
2268  *
2269  * So the computation becomes:
2270  *
2271  * | x' | | X0 * x | | C0 |
2272  * | y' | = | X1 * x | + | C1 |
2273  */
2275  const Vector2 X(source_A_target->data() + 0);
2276  const Vector2 c(Vector2(source_A_target->data() + 3) * Scalar(y) + Vector2(source_A_target->data() + 6));
2278  for (unsigned int x = 0u; x < targetWidth; ++x)
2279  {
2280  const Vector2 sourcePosition = X * Scalar(x) + c;
2282 #ifdef OCEAN_DEBUG
2283  const Scalar debugSourceX = (*source_A_target)[0] * Scalar(x) + (*source_A_target)[3] * Scalar(y) + (*source_A_target)[6];
2284  const Scalar debugSourceY = (*source_A_target)[1] * Scalar(x) + (*source_A_target)[4] * Scalar(y) + (*source_A_target)[7];
2285  ocean_assert(sourcePosition.isEqual(Vector2(debugSourceX, debugSourceY), Scalar(0.01)));
2286 #endif
2288  if (sourcePosition.x() < Scalar(0) || sourcePosition.x() > scalarSourceWidth_1 || sourcePosition.y() < Scalar(0) || sourcePosition.y() > scalarSourceHeight_1)
2289  {
2290  *targetRow = *bColor;
2291  }
2292  else
2293  {
2294  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2295  }
2297  targetRow++;
2298  }
2299  }
2300 }
2302 template <unsigned int tChannels>
2303 void FrameInterpolatorBilinear::homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2304 {
2305  static_assert(tChannels >= 1u, "Invalid channel number!");
2307  ocean_assert(input != nullptr && output != nullptr);
2308  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2309  ocean_assert(outputWidth > 0u && outputHeight > 0u);
2310  ocean_assert(input_H_output != nullptr);
2312  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2314  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2316  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
2317  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
2319  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2321  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2322  const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2324  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2325  {
2326  /*
2327  * We can slightly optimize the 3x3 matrix multiplication:
2328  *
2329  * | X0 Y0 Z0 | | x |
2330  * | X1 Y1 Z1 | * | y |
2331  * | X2 Y2 Z2 | | 1 |
2332  *
2333  * | xx | | X0 * x | | Y0 * y + Z0 |
2334  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2335  * | zz | | X2 * x | | Y2 * y + Z2 |
2336  *
2337  * | xx | | X0 * x | | C0 |
2338  * | yy | = | X1 * x | + | C1 |
2339  * | zz | | X2 * x | | C2 |
2340  *
2341  * As y is constant within the inner loop, we can pre-calculate the following terms:
2342  *
2343  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2344  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2345  */
2347  const Vector2 X(input_H_output->data() + 0);
2348  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2350  const Scalar X2 = (*input_H_output)(2, 0);
2351  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2353  PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2355  for (unsigned int x = 0u; x < outputWidth; ++x)
2356  {
2357  ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2358  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2360 #ifdef OCEAN_DEBUG
2361  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2362  ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2363 #endif
2365  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
2366  {
2367  *outputRowPixel = bColor;
2368  }
2369  else
2370  {
2371  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2372  }
2374  ++outputRowPixel;
2375  }
2376  }
2377 }
2379 template <typename T, unsigned int tChannels>
2380 void FrameInterpolatorBilinear::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2381 {
2382  static_assert(tChannels >= 1u, "Invalid channel number!");
2384  ocean_assert(input != nullptr && output != nullptr);
2385  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2386  ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2387  ocean_assert(input_H_output != nullptr);
2389  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2391  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2393  const Scalar scalarInputWidth1 = Scalar(inputWidth - 1u);
2394  const Scalar scalarInputHeight1 = Scalar(inputHeight - 1u);
2396  // we need to find a best matching floating point data type for the intermediate interpolation results
2397  typedef typename FloatTyper<T>::Type TIntermediate;
2399  typedef typename DataType<T, tChannels>::Type PixelType;
2401  constexpr T zeroColor[tChannels] = {T(0)};
2402  const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2404  constexpr TIntermediate bias = TIntermediate(0);
2406  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2407  {
2408  /*
2409  * We can slightly optimize the 3x3 matrix multiplication:
2410  *
2411  * | X0 Y0 Z0 | | x |
2412  * | X1 Y1 Z1 | * | y |
2413  * | X2 Y2 Z2 | | 1 |
2414  *
2415  * | xx | | X0 * x | | Y0 * y + Z0 |
2416  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2417  * | zz | | X2 * x | | Y2 * y + Z2 |
2418  *
2419  * | xx | | X0 * x | | C0 |
2420  * | yy | = | X1 * x | + | C1 |
2421  * | zz | | X2 * x | | C3 |
2422  *
2423  * As y is constant within the inner loop, we can pre-calculate the following terms:
2424  *
2425  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2426  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2427  */
2429  const Vector2 X(input_H_output->data() + 0);
2430  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2432  const Scalar X2 = (*input_H_output)(2, 0);
2433  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2435  PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2437  for (unsigned int x = 0u; x < outputWidth; ++x)
2438  {
2439  ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2440  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2442 #ifdef OCEAN_DEBUG
2443  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2444  ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2445 #endif
2447  if (inputPosition.x() >= Scalar(0) && inputPosition.x() <= scalarInputWidth1 && inputPosition.y() >= Scalar(0) && inputPosition.y() <= scalarInputHeight1)
2448  {
2449  interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2450  }
2451  else
2452  {
2453  *outputRowPixel = *bColor;
2454  }
2456  ++outputRowPixel;
2457  }
2458  }
2459 }
2463 template <unsigned int tChannels>
2464 inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2465 {
2466  static_assert(tChannels >= 1u, "Invalid channel number!");
2468  ocean_assert(source && target);
2469  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2470  ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2471  ocean_assert(source_A_target);
2472  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2474  ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2476  const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2477  const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2479  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2481  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2482  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2484  OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2486  OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2487  OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2488  OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2489  OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2491  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2492  const __m128 m128_f_X0 = _mm_set_ps1(float((*source_A_target)(0, 0)));
2493  const __m128 m128_f_X1 = _mm_set_ps1(float((*source_A_target)(1, 0)));
2495  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2496  {
2497  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2499  /*
2500  * We can slightly optimize the 3x3 matrix multiplication:
2501  *
2502  * | X0 Y0 Z0 | | x |
2503  * | X1 Y1 Z1 | * | y |
2504  * | 0 0 1 | | 1 |
2505  *
2506  * | xx | | X0 * x | | Y0 * y + Z0 |
2507  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2508  *
2509  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2510  *
2511  * C0 = Y0 * y + Z0
2512  * C1 = Y1 * y + Z1
2513  *
2514  * So the computation becomes:
2515  *
2516  * | x' | | X0 * x | | C0 |
2517  * | y' | = | X1 * x | + | C1 |
2518  */
2520  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2521  const __m128 m128_f_C0 = _mm_set_ps1(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
2522  const __m128 m128_f_C1 = _mm_set_ps1(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
2524  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2525  const __m128 m128_f_zero = _mm_setzero_ps();
2527  // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2528  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2530  // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
2531  const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2533  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2534  const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(int(sourceWidth) - 1);
2535  const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(int(sourceHeight) - 1);
2537  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2538  const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(float(sourceWidth - 1u));
2539  const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(float(sourceHeight - 1u));
2541  for (unsigned int x = 0u; x < targetWidth; x += 4u)
2542  {
2543  if (x + 4u > targetWidth)
2544  {
2545  // the last iteration will not fit into the output frame,
2546  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2548  ocean_assert(x >= 4u && targetWidth > 4u);
2549  const unsigned int newX = targetWidth - 4u;
2551  ocean_assert(x > newX);
2552  targetRow -= x - newX;
2554  x = newX;
2556  // the for loop will stop after this iteration
2557  ocean_assert(!(x + 4u < targetWidth));
2558  }
2561  // we need four successive x coordinate floats:
2562  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2563  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2565  // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2566  const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2567  const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2569  // now we check whether we are inside the input frame
2570  const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero)); // inputPosition.x() <= (inputWidth - 1) && inputPosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
2571  const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero)); // inputPosition.y() <= (inputHeight - 1) && inputPosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
2573  const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
2575  // we can stop here if all pixels are invalid
2576  if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2577  {
2578 #ifdef OCEAN_DEBUG
2579  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2580  _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2581  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2582 #endif
2584  targetRow[0] = *bColor;
2585  targetRow[1] = *bColor;
2586  targetRow[2] = *bColor;
2587  targetRow[3] = *bColor;
2589  targetRow += 4;
2591  continue;
2592  }
2594  // we store the result
2595  _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2596  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2599  // now we determine the left, top, right and bottom pixel used for the interpolation
2600  const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2601  const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2603  // left = floor(x); top = floor(y)
2604  const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2605  const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2607  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2608  const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2609  const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2611  // offset = (y * sourceStrideElements + tChannels * x)
2612  const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * sourceStrideElements + tChannels * left)
2613  const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * sourceStrideElements + tChannels * right)
2614  const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2615  const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2617  // we store the offsets
2618  _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2619  _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2620  _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2621  _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2624  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2626  // we determine the fractional portions of the x' and y':
2627  // e.g., [43.1231, -12.5543, -34.123, 99.2]
2628  // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2629  __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2630  __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2632  // we use integer interpolation [0.0, 1.0] -> [0, 128]
2633  m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2634  m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2636  m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2637  m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2639  const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2640  const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2642  interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2643  targetRow += 4;
2644  }
2645  }
2646 }
2648 template <unsigned int tChannels>
2649 inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2650 {
2651  static_assert(tChannels >= 1u, "Invalid channel number!");
2653  ocean_assert(input != nullptr && output != nullptr);
2654  ocean_assert(inputWidth > 0u && inputHeight > 0u);
2655  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2656  ocean_assert(input_H_output != nullptr);
2658  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2660  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2661  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2663  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2665  uint8_t zeroColor[tChannels] = {uint8_t(0)};
2666  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2668  OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2670  OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2671  OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2672  OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2673  OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2675  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2676  const __m128 m128_f_X0 = _mm_set_ps1(float((*input_H_output)(0, 0)));
2677  const __m128 m128_f_X1 = _mm_set_ps1(float((*input_H_output)(1, 0)));
2678  const __m128 m128_f_X2 = _mm_set_ps1(float((*input_H_output)(2, 0)));
2680  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2681  const __m128 m128_f_zero = _mm_setzero_ps();
2683  // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2684  const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2686  // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
2687  const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2689  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth -1, inputWidth -1], and same with inputHeight
2690  const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(int(inputWidth) - 1);
2691  const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(int(inputHeight) - 1);
2693  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2694  const __m128 m128_f_inputWidth_1 = _mm_set_ps1(float(inputWidth - 1u));
2695  const __m128 m128_f_inputHeight_1 = _mm_set_ps1(float(inputHeight - 1u));
2697  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2698  {
2699  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2701  /*
2702  * We can slightly optimize the 3x3 matrix multiplication:
2703  *
2704  * | X0 Y0 Z0 | | x |
2705  * | X1 Y1 Z1 | * | y |
2706  * | X2 Y2 Z2 | | 1 |
2707  *
2708  * | xx | | X0 * x | | Y0 * y + Z0 |
2709  * | yy | = | X1 * x | + | Y1 * y + Z1 |
2710  * | zz | | X2 * x | | Y2 * y + Z2 |
2711  *
2712  * | xx | | X0 * x | | C0 |
2713  * | yy | = | X1 * x | + | C1 |
2714  * | zz | | X2 * x | | C2 |
2715  *
2716  * As y is constant within the inner loop, we can pre-calculate the following terms:
2717  *
2718  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2719  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2720  */
2722  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2723  const __m128 m128_f_C0 = _mm_set_ps1(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
2724  const __m128 m128_f_C1 = _mm_set_ps1(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
2725  const __m128 m128_f_C2 = _mm_set_ps1(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
2727  for (unsigned int x = 0u; x < outputWidth; x += 4u)
2728  {
2729  if (x + 4u > outputWidth)
2730  {
2731  // the last iteration will not fit into the output frame,
2732  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2734  ocean_assert(x >= 4u && outputWidth > 4u);
2735  const unsigned int newX = outputWidth - 4u;
2737  ocean_assert(x > newX);
2738  outputPixelData -= x - newX;
2740  x = newX;
2742  // the for loop will stop after this iteration
2743  ocean_assert(!(x + 4u < outputWidth));
2744  }
2747  // we need four successive x coordinate floats:
2748  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2749  const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2751  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2752  const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2753  const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2754  const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2756 #ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
2758  // we calculate the (approximated) inverse of zz,
2759  // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
2760  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
2761  const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2763  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2764  const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2765  const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2767 #else
2769  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2770  const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2771  const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2776  // now we check whether we are inside the input frame
2777  const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero)); // inputPosition.x() <= (inputWidth-1) && inputPosition.x() >= 0 ? 0xFFFFFF : 0x000000
2778  const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero)); // inputPosition.y() <= (inputHeight-1) && inputPosition.y() >= 0 ? 0xFFFFFF : 0x000000
2780  const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
2782  // we can stop here if all pixels are invalid
2783  if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2784  {
2785 #ifdef OCEAN_DEBUG
2786  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2787  _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2788  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2789 #endif
2791  outputPixelData[0] = *bColor;
2792  outputPixelData[1] = *bColor;
2793  outputPixelData[2] = *bColor;
2794  outputPixelData[3] = *bColor;
2796  outputPixelData += 4;
2798  continue;
2799  }
2801  // we store the result
2802  _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2803  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2806  // now we determine the left, top, right and bottom pixel used for the interpolation
2807  const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2808  const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2810  // left = floor(x); top = floor(y)
2811  const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2812  const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2814  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2815  const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2816  const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2818  // offset = (y * inputStrideElements + tChannels * x)
2819  const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * inputStrideElements + tChannels * left)
2820  const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * inputStrideElements + tChannels * right)
2821  const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2822  const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2824  // we store the offsets
2825  _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2826  _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2827  _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2828  _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2831  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2833  // we determine the fractional portions of the x' and y':
2834  // e.g., [43.1231, -12.5543, -34.123, 99.2]
2835  // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2836  __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2837  __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2839  // we use integer interpolation [0.0, 1.0] -> [0, 128]
2840  m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2841  m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2843  m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2844  m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2846  const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2847  const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2849  interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2850  outputPixelData += 4;
2851  }
2852  }
2853 }
2855 template <>
2856 OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2857 {
2858  // sourcesTopLeft stores the three color values of 4 (independent) pixels (the upper left pixels):
2859  // FEDC BA98 7654 3210
2860  // ---- VUYV UYVU YVUY
2861  // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2863  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2864  // FEDC BA98 7654 3210
2865  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2868  // we will simply extract each channel from the source pixels,
2869  // each extracted channel will be multiplied by the corresponding interpolation factor
2870  // and all interpolation results will be accumulated afterwards
2872  // FEDC BA98 7654 3210
2873  const __m128i mask32_Channel0 = SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull); // ---9 ---6 ---3 ---0
2874  const __m128i mask32_Channel1 = SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull); // ---A ---7 ---4 ---1
2875  const __m128i mask32_Channel2 = SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull); // ---B ---8 ---5 ---2
2878  // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2879  // FEDC BA98 7654 3210
2880  // ---9 ---6 ---3 ---0
2881  // *
2882  // FTL3 FTL2 FTL1 FTL0
2883  __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2885  // we the same multiplication for the second channel
2886  __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2888  // and third channel
2889  __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2892  // now we repeat the process for the top right pixel values
2893  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2894  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2895  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2898  // and for the bottom left pixel values
2899  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2900  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2901  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2904  // and for the bottom right pixel values
2905  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2906  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2907  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2910  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2912  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
2914  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
2915  // target data: ---9 ---6 ---3 ---0
2916  // shufflet target: ---- --9- -6-- 3--0
2917  // mask location: ---C ---8 ---4 ---0
2918  // mask: ---- --C- -8-- 4--0
2919  __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2921  // target data: ---A ---7 ---4 ---1
2922  // shufflet target: ---- -A-- 7--4 --1-
2923  // mask location: ---C ---8 ---4 ---0
2924  // mask: ---- -C-- 8--4 --0-
2925  __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2927  // target data: ---B ---8 ---5 ---2
2928  // shufflet target: ---- B--8 --5- -2--
2929  // mask location: ---C ---8 ---4 ---0
2930  // mask: ---- C--8 --4- -0--
2931  __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2934  // finally, we simply blend all interpolation results together
2936  return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2937 }
2939 template <>
2940 OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2941 {
2942  // sourcesTopLeft stores the four color values of 4 (independent) pixels (the upper left pixels):
2943  // FEDC BA98 7654 3210
2945  // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2947  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2948  // FEDC BA98 7654 3210
2949  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2952  // we will simply extract each channel from the source pixels,
2953  // each extracted channel will be multiplied by the corresponding interpolation factor
2954  // and all interpolation results will be accumulated afterwards
2956  // FEDC BA98 7654 3210
2957  const __m128i mask32_Channel0 = SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull); // ---C ---8 ---4 ---0
2958  const __m128i mask32_Channel1 = SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull); // ---D ---9 ---5 ---1
2959  const __m128i mask32_Channel2 = SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull); // ---E ---A ---6 ---2
2960  const __m128i mask32_Channel3 = SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull); // ---F ---B ---7 ---3
2963  // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2964  // FEDC BA98 7654 3210
2965  // ---C ---8 ---4 ---0
2966  // *
2967  // FTL3 FTL2 FTL1 FTL0
2968  __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2970  // we the same multiplication for the second channel
2971  __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2973  // and third channel
2974  __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2976  // and last channel
2977  __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
2980  // now we repeat the process for the top right pixel values
2981  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2982  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2983  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2984  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
2987  // and for the bottom left pixel values
2988  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2989  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2990  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2991  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
2994  // and for the bottom right pixel values
2995  multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2996  multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2997  multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2998  multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
3001  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3003  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128)
3005  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3006  // ---C ---8 ---4 ---0
3007  // ---C ---9 ---4 ---0
3008  __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3010  // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3011  // ---D ---9 ---5 ---1
3012  // --D- --9- --5- --1-
3013  __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3015  // ---E ---A ---6 ---2
3016  // -E-- -A-- -6-- -2--
3017  __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3019  // ---F ---B ---7 ---3
3020  // F--- B--- 7--- 3---
3021  __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3024  // finally, we simply blend all interpolation results together
3026  return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3027 }
3031 // we see a significant performance decrease with non-VS compilers/platforms,
3032 // so we do not use the 3channel version with non-Windows compilers
3034 template <>
3035 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3036 {
3037  ocean_assert(source != nullptr);
3038  ocean_assert(targetPositionPixels != nullptr);
3040  typedef typename DataType<uint8_t, 1u>::Type PixelType;
3042  // as we do not initialize the following intermediate data,
3043  // we hopefully will not allocate memory on the stack each time this function is called
3044  OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3046  // we gather the individual source pixel values from the source image,
3047  // based on the calculated pixel locations
3048  for (unsigned int i = 0u; i < 4u; ++i)
3049  {
3050  if (validPixels[i])
3051  {
3052  pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3053  pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3054  pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3055  pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3056  }
3057  else
3058  {
3059  pixels[i * 4u + 0u] = borderColor;
3060  pixels[i * 4u + 1u] = borderColor;
3061  pixels[i * 4u + 2u] = borderColor;
3062  pixels[i * 4u + 3u] = borderColor;
3063  }
3064  }
3066  static_assert(sizeof(__m128i) == sizeof(pixels), "Invalid data type!");
3068  const __m128i m128_pixels = _mm_load_si128((const __m128i*)pixels);
3071  // factorLeft = 128 - factorRight
3072  // factorTop = 128 - factorBottom
3074  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3075  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3077  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3078  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3080  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3081  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3082  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3083  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3085  // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3086  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3089  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3090  // FEDC BA98 7654 3210
3091  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3094  // we will simply extract each channel from the source pixels,
3095  // each extracted channel will be multiplied by the corresponding interpolation factor
3096  // and all interpolation results will be accumulated afterwards
3098  // FEDC BA98 7654 3210
3099  const __m128i mask32_topLeft = SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull); // ---C ---8 ---4 ---0
3100  const __m128i mask32_topRight = SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull); // ---D ---9 ---5 ---1
3101  const __m128i mask32_bottomLeft = SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull); // ---E ---A ---6 ---2
3102  const __m128i mask32_bottomRight = SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull); // ---F ---B ---7 ---3
3105  // we extract the top left values and multiply them with the interpolation factors
3106  // FEDC BA98 7654 3210
3107  // ---C ---8 ---4 ---0
3108  // *
3109  // FTL3 FTL2 FTL1 FTL0
3110  __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3111  __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3113  multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3114  multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3116  __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3118  const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3120  // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
3121  // additionally, we shuffle the individual results together
3123  const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3125  *((unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3126 }
3128 template <>
3129 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3130 {
3131  ocean_assert(source != nullptr);
3132  ocean_assert(targetPositionPixels != nullptr);
3134  typedef typename DataType<uint8_t, 3u>::Type PixelType;
3136  // as we do not initialize the following intermediate data,
3137  // we hopefully will not allocate memory on the stack each time this function is called
3138  OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3139  OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3140  OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3141  OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3143  // we gather the individual source pixel values from the source image,
3144  // based on the calculated pixel locations
3145  for (unsigned int i = 0u; i < 4u; ++i)
3146  {
3147  if (validPixels[i])
3148  {
3149  topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3150  topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3151  bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3152  bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3153  }
3154  else
3155  {
3156  topLeftPixels[i] = borderColor;
3157  topRightPixels[i] = borderColor;
3158  bottomLeftPixels[i] = borderColor;
3159  bottomRightPixels[i] = borderColor;
3160  }
3161  }
3163  static_assert(sizeof(__m128i) <= sizeof(topLeftPixels), "Invalid data type!");
3165  const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3166  const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3167  const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3168  const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3171  // factorLeft = 128 - factorRight
3172  // factorTop = 128 - factorBottom
3174  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3175  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3177  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3178  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3180  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3181  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3182  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3183  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3186  const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3188  // we copy the first 12 bytes
3189  memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3190 }
3192 #endif // OCEAN_COMPILER_MSC
3194 template <>
3195 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3196 {
3197  ocean_assert(source != nullptr);
3198  ocean_assert(targetPositionPixels != nullptr);
3200  typedef typename DataType<uint8_t, 4u>::Type PixelType;
3202  // as we do not initialize the following intermediate data,
3203  // we hopefully will not allocate memory on the stack each time this function is called
3204  OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3205  OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3206  OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3207  OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3209  // we gather the individual source pixel values from the source image,
3210  // based on the calculated pixel locations
3212  for (unsigned int i = 0u; i < 4u; ++i)
3213  {
3214  if (validPixels[i])
3215  {
3216  topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3217  topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3218  bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3219  bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3220  }
3221  else
3222  {
3223  topLeftPixels[i] = borderColor;
3224  topRightPixels[i] = borderColor;
3225  bottomLeftPixels[i] = borderColor;
3226  bottomRightPixels[i] = borderColor;
3227  }
3228  }
3230  static_assert(sizeof(__m128i) == sizeof(topLeftPixels), "Invalid data type!");
3232  const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3233  const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3234  const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3235  const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3238  // factorLeft = 128 - factorRight
3239  // factorTop = 128 - factorBottom
3241  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3242  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3244  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3245  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3247  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3248  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3249  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3250  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3253  const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3255  _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3256 }
3258 template <unsigned int tChannels>
3259 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3260 {
3261  ocean_assert(source != nullptr);
3262  ocean_assert(targetPositionPixels != nullptr);
3264  // as we do not initialize the following intermediate data,
3265  // we hopefully will not allocate memory on the stack each time this function is called
3266  OCEAN_ALIGN_DATA(16) unsigned int factorsTopLeft[4];
3267  OCEAN_ALIGN_DATA(16) unsigned int factorsTopRight[4];
3268  OCEAN_ALIGN_DATA(16) unsigned int factorsBottomLeft[4];
3269  OCEAN_ALIGN_DATA(16) unsigned int factorsBottomRight[4];
3272  // factorLeft = 128 - factorRight
3273  // factorTop = 128 - factorBottom
3275  const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3276  const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3278  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3279  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3281  const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3282  const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3283  const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3284  const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3287  // we store the interpolation factors
3288  _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3289  _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3290  _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3291  _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3293  for (unsigned int i = 0u; i < 4u; ++i)
3294  {
3295  if (validPixels[i])
3296  {
3297  const uint8_t* topLeft = source + offsetsTopLeft[i];
3298  const uint8_t* topRight = source + offsetsTopRight[i];
3300  const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3301  const uint8_t* bottomRight = source + offsetsBottomRight[i];
3303  const unsigned int& factorTopLeft = factorsTopLeft[i];
3304  const unsigned int& factorTopRight = factorsTopRight[i];
3305  const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3306  const unsigned int& factorBottomRight = factorsBottomRight[i];
3308  for (unsigned int n = 0u; n < tChannels; ++n)
3309  {
3310  ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3311  }
3312  }
3313  else
3314  {
3315  *targetPositionPixels = borderColor;
3316  }
3318  targetPositionPixels++;
3319  }
3320 }
3326 template <unsigned int tChannels>
3327 void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
3328 {
3329  static_assert(tChannels >= 1u, "Invalid channel number!");
3331  ocean_assert(source && target);
3332  ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3333  ocean_assert(targetWidth >= 4u && targetHeight > 0u);
3334  ocean_assert(source_A_target);
3335  ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
3337  ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3339  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3340  const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3342  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3344  uint8_t zeroColor[tChannels] = {uint8_t(0)};
3345  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3347  unsigned int validPixels[4];
3349  unsigned int topLeftOffsetsElements[4];
3350  unsigned int topRightOffsetsElements[4];
3351  unsigned int bottomLeftOffsetsElements[4];
3352  unsigned int bottomRightOffsetsElements[4];
3354  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3356  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3357  const float32x4_t m128_f_X0 = vdupq_n_f32(float((*source_A_target)(0, 0)));
3358  const float32x4_t m128_f_X1 = vdupq_n_f32(float((*source_A_target)(1, 0)));
3360  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3361  {
3362  PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3364  /*
3365  * We can slightly optimize the 3x3 matrix multiplication:
3366  *
3367  * | X0 Y0 Z0 | | x |
3368  * | X1 Y1 Z1 | * | y |
3369  * | 0 0 1 | | 1 |
3370  *
3371  * | xx | | X0 * x | | Y0 * y + Z0 |
3372  * | yy | = | X1 * x | + | Y1 * y + Z1 |
3373  *
3374  * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
3375  *
3376  * C0 = Y0 * y + Z0
3377  * C1 = Y1 * y + Z1
3378  *
3379  * So the computation becomes:
3380  *
3381  * | x' | | X0 * x | | C0 |
3382  * | y' | = | X1 * x | + | C1 |
3383  */
3385  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3386  const float32x4_t m128_f_C0 = vdupq_n_f32(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
3387  const float32x4_t m128_f_C1 = vdupq_n_f32(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
3389  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3390  const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3392  // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
3393  const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3395  // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3396  const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3397  const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3399  // we store 4 floats: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3400  const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(float(sourceWidth - 1u));
3401  const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(float(sourceHeight - 1u));
3403  for (unsigned int x = 0u; x < targetWidth; x += 4u)
3404  {
3405  if (x + 4u > targetWidth)
3406  {
3407  // the last iteration will not fit into the target frame,
3408  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3410  ocean_assert(x >= 4u && targetWidth > 4u);
3411  const unsigned int newX = targetWidth - 4u;
3413  ocean_assert(x > newX);
3414  targetRow -= x - newX;
3416  x = newX;
3418  // the for loop will stop after this iteration
3419  ocean_assert(!(x + 4u < targetWidth));
3420  }
3423  // we need four successive x coordinate floats:
3424  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3425  float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3426  const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3428  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3429  const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3430  const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3433  // now we check whether we are inside the source frame
3434  const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero)); // sourcePosition.x() <= (sourceWidth - 1) && sourcePosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
3435  const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero)); // sourcePosition.y() <= (sourceHeight - 1) && sourcePosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
3437  const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_source_frame(sourcePosition) ? 0xFFFFFFFF : 0x00000000
3440  // we can stop here if all pixels are invalid
3441  const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3442  if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3443  {
3444 #ifdef OCEAN_DEBUG
3445  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3446  vst1q_u32(debugValidPixels, m128_u_validPixel);
3447  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3448 #endif
3450  targetRow[0] = *bColor;
3451  targetRow[1] = *bColor;
3452  targetRow[2] = *bColor;
3453  targetRow[3] = *bColor;
3455  targetRow += 4;
3457  continue;
3458  }
3461  // we store the result
3462  vst1q_u32(validPixels, m128_u_validPixel);
3463  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3466  // now we determine the left, top, right and bottom pixel used for the interpolation
3467  // left = floor(x); top = floor(y)
3468  const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3469  const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3471  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3472  const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3473  const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3475  // offset = y * stride + x * channels
3476  const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topLeftOffset = top * strideElements + left * channels
3477  const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topRightOffset = top * strideElements + right * channels
3478  const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements); // ...
3479  const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3481  // we store the offsets
3482  vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3483  vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3484  vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3485  vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3488  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3490  // we determine the fractional portions of the x' and y':
3491  float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3492  float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3494  // we use integer interpolation [0.0, 1.0] -> [0, 128]
3495  m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3496  m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3498  const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3499  const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3501  if constexpr (tChannels > 4u)
3502  {
3503  // normally we would simply call instead of copying the code of the function to this location
3504  // however, if calling the function instead of applying the code here directly
3505  // clang ends with code approx. 20% slower
3506  // thus we make a copy of the code and keep the function for demonstration purposes
3508  //interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetPixelData);
3509  //targetPixelData += 4;
3511  const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3512  const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3514  // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3515  // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3516  const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3517  const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3518  const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3519  const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3521  unsigned int tx_ty_s[4];
3522  unsigned int txty_s[4];
3523  unsigned int tx_tys[4];
3524  unsigned int txtys[4];
3526  // we store the interpolation factors
3527  vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3528  vst1q_u32(txty_s, m128_u_txty_);
3529  vst1q_u32(tx_tys, m128_u_tx_ty);
3530  vst1q_u32(txtys, m128_u_txty);
3532  for (unsigned int i = 0u; i < 4u; ++i)
3533  {
3534  if (validPixels[i])
3535  {
3536  ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3537  ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3538  ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3539  ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3541  const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3542  const uint8_t* topRight = source + topRightOffsetsElements[i];
3544  const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3545  const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3547  const unsigned int tx_ty_ = tx_ty_s[i];
3548  const unsigned int txty_ = txty_s[i];
3549  const unsigned int tx_ty = tx_tys[i];
3550  const unsigned int txty = txtys[i];
3552  ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3554  for (unsigned int n = 0u; n < tChannels; ++n)
3555  {
3556  ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3557  }
3558  }
3559  else
3560  {
3561  *targetRow = *bColor;
3562  }
3564  targetRow++;
3565  }
3566  }
3567  else
3568  {
3569  interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3570  targetRow += 4;
3571  }
3572  }
3573  }
3574 }
3576 template <unsigned int tChannels>
3577 void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
3578 {
3579  static_assert(tChannels >= 1u, "Invalid channel number!");
3581  ocean_assert(input != nullptr && output != nullptr);
3582  ocean_assert(inputWidth > 0u && inputHeight > 0u);
3583  ocean_assert(outputWidth >= 4u && outputHeight > 0u);
3584  ocean_assert(input_H_output != nullptr);
3586  ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3588  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3589  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3591  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3593  uint8_t zeroColor[tChannels] = {uint8_t(0)};
3594  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3596  unsigned int validPixels[4];
3598  unsigned int topLeftOffsetsElements[4];
3599  unsigned int topRightOffsetsElements[4];
3600  unsigned int bottomLeftOffsetsElements[4];
3601  unsigned int bottomRightOffsetsElements[4];
3603  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3605  // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3606  const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
3607  const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
3608  const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
3610  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3611  {
3612  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3614  /*
3615  * We can slightly optimize the 3x3 matrix multiplication:
3616  *
3617  * | X0 Y0 Z0 | | x |
3618  * | X1 Y1 Z1 | * | y |
3619  * | X2 Y2 Z2 | | 1 |
3620  *
3621  * | xx | | X0 * x | | Y0 * y + Z0 |
3622  * | yy | = | X1 * x | + | Y1 * y + Z1 |
3623  * | zz | | X2 * x | | Y2 * y + Z2 |
3624  *
3625  * | xx | | X0 * x | | C0 |
3626  * | yy | = | X1 * x | + | C1 |
3627  * | zz | | X2 * x | | C3 |
3628  *
3629  * As y is constant within the inner loop, we can pre-calculate the following terms:
3630  *
3631  * | x' | | (X0 * x + C0) / (X2 * x + C2) |
3632  * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
3633  */
3635  // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3636  const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
3637  const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
3638  const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
3640  // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3641  const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3643  // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
3644  const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3646  // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3647  const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3648  const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3650  // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3651  const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(float(inputWidth - 1u));
3652  const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(float(inputHeight - 1u));
3654  for (unsigned int x = 0u; x < outputWidth; x += 4u)
3655  {
3656  if (x + 4u > outputWidth)
3657  {
3658  // the last iteration will not fit into the output frame,
3659  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3661  ocean_assert(x >= 4u && outputWidth > 4u);
3662  const unsigned int newX = outputWidth - 4u;
3664  ocean_assert(x > newX);
3665  outputPixelData -= x - newX;
3667  x = newX;
3669  // the for loop will stop after this iteration
3670  ocean_assert(!(x + 4u < outputWidth));
3671  }
3674  // we need four successive x coordinate floats:
3675  // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3676  float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3677  const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3679  // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3680  const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3681  const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3682  const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3686  // using the division available from ARM64 is more precise
3687  const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3688  const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3690 #else
3692  // we calculate the (approximated) inverse of zz
3693  // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
3694  float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3695  inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
3697  // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
3698  const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3699  const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3704  // now we check whether we are inside the input frame
3705  const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
3706  const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
3708  const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
3711  // we can stop here if all pixels are invalid
3712  const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3713  if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3714  {
3715 #ifdef OCEAN_DEBUG
3716  OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3717  vst1q_u32(debugValidPixels, m128_u_validPixel);
3718  ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3719 #endif
3721  outputPixelData[0] = *bColor;
3722  outputPixelData[1] = *bColor;
3723  outputPixelData[2] = *bColor;
3724  outputPixelData[3] = *bColor;
3726  outputPixelData += 4;
3728  continue;
3729  }
3732  // we store the result
3733  vst1q_u32(validPixels, m128_u_validPixel);
3734  ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3737  // now we determine the left, top, right and bottom pixel used for the interpolation
3738  // left = floor(x); top = floor(y)
3739  const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3740  const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3742  // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3743  const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3744  const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3746  // offset = y * stride + x * channels
3747  const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topLeftOffset = top * strideElements + left * channels
3748  const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topRightOffset = top * strideElements + right * channels
3749  const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements); // ...
3750  const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3752  // we store the offsets
3753  vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3754  vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3755  vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3756  vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3759  // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3761  // we determine the fractional portions of the x' and y':
3762  float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3763  float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3765  // we use integer interpolation [0.0, 1.0] -> [0, 128]
3766  m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3767  m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3769  const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3770  const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3772  if constexpr (tChannels > 4u)
3773  {
3774  // normally we would simply call instead of copying the code of the function to this location
3775  // however, if calling the function instead of applying the code here directly
3776  // clang ends with code approx. 20% slower
3777  // thus we make a copy of the code and keep the function for demonstration purposes
3779  //interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3780  //outputPixelData += 4;
3782  const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3783  const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3785  // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3786  // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3787  const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3788  const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3789  const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3790  const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3792  unsigned int tx_ty_s[4];
3793  unsigned int txty_s[4];
3794  unsigned int tx_tys[4];
3795  unsigned int txtys[4];
3797  // we store the interpolation factors
3798  vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3799  vst1q_u32(txty_s, m128_u_txty_);
3800  vst1q_u32(tx_tys, m128_u_tx_ty);
3801  vst1q_u32(txtys, m128_u_txty);
3803  for (unsigned int i = 0u; i < 4u; ++i)
3804  {
3805  if (validPixels[i])
3806  {
3807  ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3808  ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3809  ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3810  ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3812  const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3813  const uint8_t* topRight = input + topRightOffsetsElements[i];
3815  const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3816  const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3818  const unsigned int tx_ty_ = tx_ty_s[i];
3819  const unsigned int txty_ = txty_s[i];
3820  const unsigned int tx_ty = tx_tys[i];
3821  const unsigned int txty = txtys[i];
3823  ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3825  for (unsigned int n = 0u; n < tChannels; ++n)
3826  {
3827  ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3828  }
3829  }
3830  else
3831  {
3832  *outputPixelData = *bColor;
3833  }
3835  outputPixelData++;
3836  }
3837  }
3838  else
3839  {
3840  interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3841  outputPixelData += 4;
3842  }
3843  }
3844  }
3845 }
3847 template <>
3848 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3849 {
3850  ocean_assert(source != nullptr);
3851  ocean_assert(targetPositionPixels != nullptr);
3853  // as we do not initialize the following intermediate data,
3854  // we hopefully will not allocate memory on the stack each time this function is called
3855  DataType<uint8_t, 1u>::Type pixels[16];
3857  // we will store the pixel information in the following pattern:
3858  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3859  // BR3 BL3 TR3 TL3 BR2 BL2 TR2 TL2 BR1 BL1 TR1 TL1 BR0 BL0 TR0 TL0
3861  // we gather the individual source pixel values from the source image,
3862  // based on the calculated pixel locations
3863  for (unsigned int i = 0u; i < 4u; ++i)
3864  {
3865  if (validPixels[i])
3866  {
3867  pixels[i * 4u + 0u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopLeftElements[i]));
3868  pixels[i * 4u + 1u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopRightElements[i]));
3869  pixels[i * 4u + 2u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomLeftElements[i]));
3870  pixels[i * 4u + 3u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomRightElements[i]));
3871  }
3872  else
3873  {
3874  pixels[i * 4u + 0u] = borderColor;
3875  pixels[i * 4u + 1u] = borderColor;
3876  pixels[i * 4u + 2u] = borderColor;
3877  pixels[i * 4u + 3u] = borderColor;
3878  }
3879  }
3881  static_assert(sizeof(uint8x16_t) == sizeof(pixels), "Invalid data type!");
3883  const uint8x16_t m128_pixels = vld1q_u8((const uint8_t*)pixels);
3886  // factorLeft = 128 - factorRight
3887  // factorTop = 128 - factorBottom
3889  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3890  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3892  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3893  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3895  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3896  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3897  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3898  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3900  // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3901  // F E D C B A 9 8 7 6 5 4 3 2 1 0
3904  // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3905  // FEDC BA98 7654 3210
3906  // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3909  // we will simply extract each channel from the source pixels,
3910  // each extracted channel will be multiplied by the corresponding interpolation factor
3911  // and all interpolation results will be accumulated afterwards
3913  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3915  const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3916  const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3917  const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3918  const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3920  const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3922  // we add 8192 and shift by 14 bits
3924  const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3926  // finally we have the following result:
3927  // ---C ---8 ---4 ---0
3928  // and we need to extract the four pixel values:
3929  //
3930  // NOTE: Because of a possible bug in Clang affecting ARMv7, vget_lane_u32()
3931  // seems to assume 32-bit memory alignment for output location, which cannot
3932  // be guaranteed. This results in bus errors and crashes the application.
3933  // ARM64 is not affected.
3934 #if defined(__aarch64__)
3936  const uint8x8_t m64_mask0 = {0, 4, 1, 1, 1, 1, 1, 1};
3937  const uint8x8_t m64_mask1 = {1, 1, 0, 4, 1, 1, 1, 1};
3939  const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3940  const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3942  const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3944  const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3945  memcpy(targetPositionPixels, &result, sizeof(uint32_t));
3947 #else
3949  *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3950  *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3951  *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3952  *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3954 #endif
3955 }
3957 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels)
3958 {
3959  const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16); // factorLeft = 128 - factorRight, factorTop = 128 - factorBottomv
3961  const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3962  const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
3964  const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
3965  const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
3967  const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8); // intermediateTop = topLeft * factorLeft + topRight * factorRight
3968  const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8); // intermediateBottom = bottomLeft * factorLeft + bottomRight * factorRight
3970  const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8)); // result = intermediateTop * factorTop + intermediateBottom + factorBottom
3971  const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
3973  const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14)); // round(result / 16384.0)
3975  const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
3977  vst1_u8(targetPositionPixels, result_8x8);
3978 }
3980 template <>
3981 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 2u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
3982 {
3983  ocean_assert(source != nullptr);
3984  ocean_assert(targetPositionPixels != nullptr);
3986  typedef typename DataType<uint8_t, 2u>::Type PixelType;
3988  // as we do not initialize the following intermediate data,
3989  // we hopefully will not allocate memory on the stack each time this function is called
3990  PixelType topPixels[8];
3991  PixelType bottomPixels[8];
3993  // we will store the pixel information in the following pattern (here for YA):
3994  // FE DC BA 98 76 54 32 10
3995  // YA YA YA YA YA YA YA YA
3996  // TR TL TR TL TR TL TR TL
3998  // we gather the individual source pixel values from the source image,
3999  // based on the calculated pixel locations
4000  for (unsigned int i = 0u; i < 4u; ++i)
4001  {
4002  if (validPixels[i])
4003  {
4004  *(topPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4005  *(topPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4006  *(bottomPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4007  *(bottomPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4008  }
4009  else
4010  {
4011  *(topPixels + i * 2u + 0u) = borderColor;
4012  *(topPixels + i * 2u + 1u) = borderColor;
4013  *(bottomPixels + i * 2u + 0u) = borderColor;
4014  *(bottomPixels + i * 2u + 1u) = borderColor;
4015  }
4016  }
4018  static_assert(sizeof(uint32x4_t) == sizeof(topPixels), "Invalid data type!");
4020  const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topPixels));
4021  const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomPixels));
4024  // factorLeft = 128 - factorRight
4025  // factorTop = 128 - factorBottom
4027  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4028  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4030  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4031  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4033  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4034  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4035  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4036  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4039  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4041  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4042  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4044  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4045  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4047  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4048  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4050  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4051  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4054  // we add 8192 and shift by 14 bits
4056  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4057  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4059  // finaly we blend the interpolation results together to get the following pattern:
4060  // FE DC BA 98 76 54 32 10
4061  // 00 YA 00 YA 00 YA 00 YA
4063  const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4065  // we shuffle the 128 bit register to a 64 bit register:
4067  const uint8x8_t m64_mask0 = {0, 1, 4, 5, 2, 2, 2, 2};
4068  const uint8x8_t m64_mask1 = {2, 2, 2, 2, 0, 1, 4, 5};
4070  const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4071  const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4073  const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4075  // no we can store the following pattern as one block:
4077  // 76 54 32 10
4078  // YA YA YA YA
4080  vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4081 }
4083 template <>
4084 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4085 {
4086  ocean_assert(source != nullptr);
4087  ocean_assert(targetPositionPixels != nullptr);
4089  // as we do not initialize the following intermediate data,
4090  // we hopefully will not allocate memory on the stack each time this function is called
4091  uint32_t topLeftPixels[4];
4092  uint32_t topRightPixels[4];
4093  uint32_t bottomLeftPixels[4];
4094  uint32_t bottomRightPixels[4];
4096  // we will store the pixel information in the following pattern, note the padding byte after each pixel (here for RGB):
4097  // FEDCBA9876543210
4098  // BGR BGR BGR BGR
4100  // we gather the individual source pixel values from the source image,
4101  // based on the calculated pixel locations
4102  for (unsigned int i = 0u; i < 4u; ++i)
4103  {
4104  if (validPixels[i])
4105  {
4106  memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i], sizeof(uint8_t) * 3);
4107  memcpy(topRightPixels + i, source + offsetsTopRightElements[i], sizeof(uint8_t) * 3);
4108  memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i], sizeof(uint8_t) * 3);
4109  memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i], sizeof(uint8_t) * 3);
4110  }
4111  else
4112  {
4113  memcpy(topLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4114  memcpy(topRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4115  memcpy(bottomLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4116  memcpy(bottomRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4117  }
4118  }
4120  static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4122  const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4123  const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4124  const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4125  const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4128  // factorLeft = 128 - factorRight
4129  // factorTop = 128 - factorBottom
4131  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4132  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4134  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4135  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4137  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4138  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4139  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4140  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4143  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4145  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4146  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4147  uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4149  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4150  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4151  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4153  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4154  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4155  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4157  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4158  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4159  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4162  // we add 8192 and shift by 14 bits
4164  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4165  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4166  const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4168  // finaly we blend the interpolation results together
4170  const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4172  // we have to extract the get rid of the padding byte:
4173  // FEDCBA9876543210
4174  // BGR BGR BGR BGR
4176  uint32_t intermediateBuffer[4];
4177  vst1q_u32(intermediateBuffer, m128_interpolation);
4179  for (unsigned int i = 0u; i < 4u; ++i)
4180  {
4181  memcpy(targetPositionPixels + i, intermediateBuffer + i, sizeof(uint8_t) * 3);
4182  }
4183 }
4185 template <>
4186 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4187 {
4188  ocean_assert(source != nullptr);
4189  ocean_assert(targetPositionPixels != nullptr);
4191  typedef typename DataType<uint8_t, 4u>::Type PixelType;
4193  // as we do not initialize the following intermediate data,
4194  // we hopefully will not allocate memory on the stack each time this function is called
4195  PixelType topLeftPixels[4];
4196  PixelType topRightPixels[4];
4197  PixelType bottomLeftPixels[4];
4198  PixelType bottomRightPixels[4];
4200  // we will store the pixel information in the following pattern (here for RGBA):
4201  // FEDC BA98 7654 3210
4204  // we gather the individual source pixel values from the source image,
4205  // based on the calculated pixel locations
4206  for (unsigned int i = 0u; i < 4u; ++i)
4207  {
4208  if (validPixels[i])
4209  {
4210  *(topLeftPixels + i) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4211  *(topRightPixels + i) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4212  *(bottomLeftPixels + i) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4213  *(bottomRightPixels + i) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4214  }
4215  else
4216  {
4217  *(topLeftPixels + i) = borderColor;
4218  *(topRightPixels + i) = borderColor;
4219  *(bottomLeftPixels + i) = borderColor;
4220  *(bottomRightPixels + i) = borderColor;
4221  }
4222  }
4224  static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4226  const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topLeftPixels));
4227  const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topRightPixels));
4228  const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomLeftPixels));
4229  const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomRightPixels));
4232  // factorLeft = 128 - factorRight
4233  // factorTop = 128 - factorBottom
4235  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4236  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4238  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4239  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4241  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4242  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4243  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4244  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4247  const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4249  uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4250  uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4251  uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4252  uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4254  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4255  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4256  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4257  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4259  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4260  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4261  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4262  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4264  m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4265  m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4266  m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4267  m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4270  // we add 8192 and shift by 14 bits
4272  const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4273  const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4274  const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4275  const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4277  // finaly we blend the interpolation results together
4279  const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4281  vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4282 }
4284 template <unsigned int tChannels>
4285 OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4286 {
4287  ocean_assert(source != nullptr);
4288  ocean_assert(targetPositionPixels != nullptr);
4290  // as we do not initialize the following intermediate data,
4291  // we hopefully will not allocate memory on the stack each time this function is called
4292  unsigned int factorsTopLeft[4];
4293  unsigned int factorsTopRight[4];
4294  unsigned int factorsBottomLeft[4];
4295  unsigned int factorsBottomRight[4];
4298  // factorLeft = 128 - factorRight
4299  // factorTop = 128 - factorBottom
4301  const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4302  const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4304  // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4305  // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4307  const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4308  const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4309  const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4310  const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4313  // we store the interpolation factors
4314  vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4315  vst1q_u32(factorsTopRight, m128_factorsTopRight);
4316  vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4317  vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4319  for (unsigned int i = 0u; i < 4u; ++i)
4320  {
4321  if (validPixels[i])
4322  {
4323  const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4324  const uint8_t* topRight = source + offsetsTopRightElements[i];
4326  const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4327  const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4329  const unsigned int& factorTopLeft = factorsTopLeft[i];
4330  const unsigned int& factorTopRight = factorsTopRight[i];
4331  const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4332  const unsigned int& factorBottomRight = factorsBottomRight[i];
4334  for (unsigned int n = 0u; n < tChannels; ++n)
4335  {
4336  ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4337  }
4338  }
4339  else
4340  {
4341  *targetPositionPixels = borderColor;
4342  }
4344  targetPositionPixels++;
4345  }
4346 }
4350 template <unsigned int tChannels>
4351 inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4352 {
4353  static_assert(tChannels >= 1u, "Invalid channel number!");
4355  ocean_assert(input && output);
4356  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4357  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4359  ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4360  ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4361  ocean_assert(homographies);
4363  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4365  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4366  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4368  constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4369  const uint8_t* const bColor = borderColor ? borderColor : zeroColor;
4371  uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4373  const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4374  const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4376  const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4377  const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4379  ocean_assert(right - left > Numeric::eps());
4380  ocean_assert(bottom - top > Numeric::eps());
4382  const Scalar invWidth = Scalar(1) / Scalar(right - left);
4383  const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4385  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4386  {
4387  for (unsigned int x = 0; x < outputWidth; ++x)
4388  {
4389  Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4391  const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4392  const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4394  outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4396  const Scalar tx = 1 - _tx;
4397  const Scalar ty = 1 - _ty;
4399  const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4400  const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4401  const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4402  const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4404  const Scalar tTopLeft = tx * ty;
4405  const Scalar tTopRight = _tx * ty;
4406  const Scalar tBottomLeft = tx * _ty;
4407  const Scalar tBottomRight = _tx * _ty;
4409  const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4410  + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4412  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4413  {
4414  for (unsigned int c = 0u; c < tChannels; ++c)
4415  {
4416  outputData[c] = bColor[c];
4417  }
4418  }
4419  else
4420  {
4421  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4422  }
4424  outputData += tChannels;
4425  }
4427  outputData += outputPaddingElements;
4428  }
4429 }
4431 template <unsigned int tChannels>
4432 void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, unsigned int firstOutputRow, const unsigned int numberOutputRows)
4433 {
4434  static_assert(tChannels >= 1u, "Invalid channel number!");
4436  ocean_assert(input != nullptr && output != nullptr);
4437  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4438  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4439  ocean_assert(input_H_output != nullptr);
4441  ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4443  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4444  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4446  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4447  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4449  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4451  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4452  {
4453  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4454  uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4456  /*
4457  * We can slightly optimize the 3x3 matrix multiplication:
4458  *
4459  * | X0 Y0 Z0 | | x |
4460  * | X1 Y1 Z1 | * | y |
4461  * | X2 Y2 Z2 | | 1 |
4462  *
4463  * | x' | | X0 * x | | Y0 * y + Z0 |
4464  * | y' | = | X1 * x | + | Y1 * y + Z1 |
4465  * | z' | | X2 * x | | Y2 * y + Z2 |
4466  *
4467  * As y is constant within the inner loop, we can pre-calculate the following terms:
4468  *
4469  * | x' | | (X0 * x + constValue0) / (X2 * x + constValue2) |
4470  * | y' | = | (X1 * x + constValue1) / (X2 * x + constValue2) |
4471  *
4472  * | p | = | (X * x + c) / (X2 * x + constValue2) |
4473  */
4475  const Vector2 X(input_H_output->data() + 0);
4476  const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
4478  const Scalar X2 = (*input_H_output)(2, 0);
4479  const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
4481  for (unsigned int x = 0; x < outputWidth; ++x)
4482  {
4483  const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
4485 #ifdef OCEAN_DEBUG
4486  const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
4487  ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
4488 #endif
4490  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4491  {
4492  *outputMaskData = 0xFF - maskValue;
4493  }
4494  else
4495  {
4496  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4497  *outputMaskData = maskValue;
4498  }
4500  outputData++;
4501  outputMaskData++;
4502  }
4503  }
4504 }
4506 template <unsigned int tChannels>
4507 inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4508 {
4509  static_assert(tChannels >= 1u, "Invalid channel number!");
4511  ocean_assert(input && output);
4512  ocean_assert(inputWidth > 0u && inputHeight > 0u);
4513  ocean_assert(outputWidth > 0u && outputHeight > 0u);
4515  ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4516  ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4517  ocean_assert(homographies);
4519  const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4520  const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4522  const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4523  const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4525  uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4526  outputMask += firstOutputRow * outputMaskStrideElements;
4528  const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4529  const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4531  const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4532  const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4534  ocean_assert(right - left > Numeric::eps());
4535  ocean_assert(bottom - top > Numeric::eps());
4537  const Scalar invWidth = Scalar(1) / Scalar(right - left);
4538  const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4540  for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4541  {
4542  for (unsigned int x = 0u; x < outputWidth; ++x)
4543  {
4544  Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4546  const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4547  const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4549  outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4551  const Scalar tx = 1 - _tx;
4552  const Scalar ty = 1 - _ty;
4554  const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4555  const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4556  const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4557  const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4559  const Scalar tTopLeft = tx * ty;
4560  const Scalar tTopRight = _tx * ty;
4561  const Scalar tBottomLeft = tx * _ty;
4562  const Scalar tBottomRight = _tx * _ty;
4564  const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4565  + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4567  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4568  {
4569  *outputMask = 0xFFu - maskValue;
4570  }
4571  else
4572  {
4573  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4574  *outputMask = maskValue;
4575  }
4577  outputData += tChannels;
4578  outputMask++;
4579  }
4581  outputData += outputPaddingElements;
4582  outputMask += outputMaskPaddingElements;
4583  }
4584 }
4586 template <unsigned int tChannels>
4587 void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4588 {
4589  static_assert(tChannels >= 1u, "Invalid channel number!");
4591  ocean_assert(inputCamera && outputCamera && normalizedHomography);
4592  ocean_assert(input && output);
4594  ocean_assert(firstRow + numberRows <= outputCamera->height());
4596  const unsigned int outputStrideElements = tChannels * outputCamera->width() + outputPaddingElements;
4598  const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4599  const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4601  const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4603  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4605  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4606  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4608  uint8_t* outputData = output + firstRow * outputStrideElements;
4610  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4611  {
4612  for (unsigned int x = 0; x < outputCamera->width(); ++x)
4613  {
4614  const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4616  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4617  {
4618  *((PixelType*)outputData) = *bColor;
4619  }
4620  else
4621  {
4622  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4623  }
4625  outputData += tChannels;
4626  }
4628  outputData += outputPaddingElements;
4629  }
4630 }
4632 template <unsigned int tChannels>
4633 void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
4634 {
4635  static_assert(tChannels >= 1u, "Invalid channel number!");
4637  ocean_assert(inputCamera != nullptr && outputCamera != nullptr && normalizedHomography != nullptr);
4638  ocean_assert(input != nullptr && output != nullptr);
4640  ocean_assert(firstRow + numberRows <= outputCamera->height());
4642  const unsigned int outputStrideElements = outputCamera->width() * tChannels + outputPaddingElements;
4643  const unsigned int outputMaskStrideElements = outputCamera->width() + outputMaskPaddingElements;
4645  const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4646  const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4648  const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4650  uint8_t* outputData = output + firstRow * outputStrideElements;
4651  outputMask += firstRow * outputMaskStrideElements;
4653  constexpr bool useDistortionParameters = true;
4655  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4656  {
4657  for (unsigned int x = 0; x < outputCamera->width(); ++x)
4658  {
4659  const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4661  if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4662  {
4663  *outputMask = 0xFF - maskValue;
4664  }
4665  else
4666  {
4667  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4668  *outputMask = maskValue;
4669  }
4671  outputData += tChannels;
4672  ++outputMask;
4673  }
4675  outputData += outputPaddingElements;
4676  outputMask += outputMaskPaddingElements;
4677  }
4678 }
4680 template <unsigned int tChannels>
4681 void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4682 {
4683  static_assert(tChannels >= 1u, "Invalid channel number!");
4685  ocean_assert(input_LT_output != nullptr);
4686  ocean_assert(input != nullptr && output != nullptr);
4688  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4689  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4691  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4693  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4694  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4696  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4698  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4700  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4702  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4703  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4705  Memory rowLookupMemory = Memory::create<Vector2>(columns);
4706  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4708  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4709  {
4710  input_LT_output->bilinearValues(y, rowLookupData);
4712  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4714  for (unsigned int x = 0u; x < columns; ++x)
4715  {
4716  const Vector2& lookupValue = rowLookupData[x];
4718  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4720  if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4721  {
4722  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4723  }
4724  else
4725  {
4726  *outputData = *bColor;
4727  }
4729  outputData++;
4730  }
4731  }
4732 }
4734 template <typename T, unsigned int tChannels>
4735 void FrameInterpolatorBilinear::lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4736 {
4737  static_assert(tChannels >= 1u, "Invalid channel number!");
4739  ocean_assert((!std::is_same<uint8_t, T>::value));
4741  ocean_assert(input_LT_output != nullptr);
4742  ocean_assert(input != nullptr && output != nullptr);
4744  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4745  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4747  typedef typename DataType<T, tChannels>::Type PixelType;
4749  const T zeroColor[tChannels] = {T(0)};
4750  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4752  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4754  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4756  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4758  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4759  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4761  Memory rowLookupMemory = Memory::create<Vector2>(columns);
4762  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4764  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4765  {
4766  input_LT_output->bilinearValues(y, rowLookupData);
4768  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4770  for (unsigned int x = 0u; x < columns; ++x)
4771  {
4772  const Vector2& lookupValue = rowLookupData[x];
4774  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4776  if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4777  {
4778  interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4779  }
4780  else
4781  {
4782  *outputData = *bColor;
4783  }
4785  outputData++;
4786  }
4787  }
4788 }
4792 template <>
4793 inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4794 {
4795  ocean_assert(input_LT_output != nullptr);
4796  ocean_assert(input != nullptr && output != nullptr);
4798  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4799  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4801  typedef uint8_t PixelType;
4803  const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4805  const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
4806  ocean_assert(outputWidth >= 8u);
4808  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4810  const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4811  const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4813  Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4814  VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
4816  const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
4817  const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
4819  // [0.0f, 1.0f, 2.0f, 3.0f, ...]
4820  const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4821  const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4822  const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4824  const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4826  const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4828  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4830  const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
4831  const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
4833  const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4834  const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4836  unsigned int validPixels[8];
4838  unsigned int topLeftOffsetsElements[8];
4839  unsigned int bottomLeftOffsetsElements[8];
4841  uint8_t pixels[32];
4843  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4844  {
4845  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4847  input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
4849  float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4850  float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4852  const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
4854  for (unsigned int x = 0u; x < outputWidth; x += 8u)
4855  {
4856  if (x + 8u > outputWidth)
4857  {
4858  // the last iteration will not fit into the output frame,
4859  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
4861  ocean_assert(x >= 8u && outputWidth > 8u);
4862  const unsigned int newX = outputWidth - 8u;
4864  ocean_assert(x > newX);
4865  const unsigned int xOffset = x - newX;
4867  outputPixelData -= xOffset;
4869  if (offset)
4870  {
4871  additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(float(xOffset)));
4872  additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(float(xOffset)));
4873  }
4875  x = newX;
4877  // the for loop will stop after this iteration
4878  ocean_assert(!(x + 8u < outputWidth));
4879  }
4881  const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 0u));
4882  const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 4u));
4884  float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
4885  float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
4887  float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
4888  float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
4890  if (offset)
4891  {
4892  inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
4893  inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
4895  inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
4896  inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
4898  additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
4899  additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
4900  }
4902  // now we check whether we are inside the input frame
4903  const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() < (inputWidth - 1) ? 0xFFFFFF : 0x000000
4904  const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
4906  const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() < (inputHeight - 1) ? 0xFFFFFF : 0x000000
4907  const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
4909  const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
4910  const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
4912  vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
4913  vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
4916  const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
4917  const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
4919  const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
4920  const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
4922  const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4923  const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4926  const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
4927  vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
4928  const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
4929  vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
4931  const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
4932  vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
4933  const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
4934  vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
4937  // we determine the fractional portions of the x' and y' and [0.0, 1.0] -> [0, 128]
4938  float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
4939  float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
4941  float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
4942  float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
4944  const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
4945  const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
4947  const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
4948  const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
4950  const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
4951  const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
4953  const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
4956  vst1q_u8(pixels + 0, constantBorderColor_u_8x16); // initialize with border color
4957  vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
4959  struct LeftRightPixel
4960  {
4961  uint8_t left;
4962  uint8_t right;
4963  };
4965  static_assert(sizeof(LeftRightPixel) == 2, "Invalid data type!");
4967  // we gather the individual source pixel values from the source image,
4968  // based on the calculated pixel locations
4969  for (unsigned int i = 0u; i < 8u; ++i)
4970  {
4971  if (validPixels[i])
4972  {
4973  ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u); // we need to have one additional pixel to the right (as we copy two pixels at once)
4974  ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
4976  ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
4977  ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
4978  }
4979  }
4981  const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
4982  const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
4984  interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
4986  outputPixelData += 8;
4987  }
4988  }
4989 }
4991 template <unsigned int tChannels>
4992 void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4993 {
4994  ocean_assert(input_LT_output != nullptr);
4995  ocean_assert(input != nullptr && output != nullptr);
4997  ocean_assert(inputWidth != 0u && inputHeight != 0u);
4998  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5000  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5002  const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5003  const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5005  const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
5006  ocean_assert(outputWidth >= 4u);
5008  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5010  const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5011  const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5013  Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5014  VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
5016  const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
5017  const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
5019  // [0.0f, 1.0f, 2.0f, 3.0f]
5020  const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5021  float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5023  const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5025  const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5027  const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
5028  const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
5030  const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5031  const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5032  const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5034  unsigned int validPixels[4];
5036  unsigned int topLeftOffsetsElements[4];
5037  unsigned int topRightOffsetsElements[4];
5038  unsigned int bottomLeftOffsetsElements[4];
5039  unsigned int bottomRightOffsetsElements[4];
5041  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5042  {
5043  PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5045  input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
5047  float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5048  const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
5050  for (unsigned int x = 0u; x < outputWidth; x += 4u)
5051  {
5052  if (x + 4u > outputWidth)
5053  {
5054  // the last iteration will not fit into the output frame,
5055  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
5057  ocean_assert(x >= 4u && outputWidth > 4u);
5058  const unsigned int newX = outputWidth - 4u;
5060  ocean_assert(x > newX);
5061  const unsigned int xOffset = x - newX;
5063  outputPixelData -= xOffset;
5065  if (offset)
5066  {
5067  additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(float(xOffset)));
5068  }
5070  x = newX;
5072  // the for loop will stop after this iteration
5073  ocean_assert(!(x + 4u < outputWidth));
5074  }
5076  const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x));
5078  float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5079  float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5081  if (offset)
5082  {
5083  inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5084  inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5086  additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5087  }
5089  // now we check whether we are inside the input frame
5090  const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
5091  const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
5093  const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
5095  vst1q_u32(validPixels, validPixels_u_32x4);
5097  const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5098  const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5100  const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5101  const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5103  const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
5104  const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5105  const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5106  const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5108  vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5109  vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5110  vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5111  vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5113  // we determine the fractional portions of the x' and y':
5114  float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5115  float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5117  // we use integer interpolation [0.0, 1.0] -> [0, 128]
5118  tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5119  ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5121  const uint32x4_t tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5122  const uint32x4_t ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5124  interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5126  outputPixelData += 4;
5127  }
5128  }
5129 }
5133 template <unsigned int tChannels>
5134 void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5135 {
5136  ocean_assert(input_LT_output != nullptr);
5137  ocean_assert(input != nullptr && output != nullptr);
5139  ocean_assert(inputWidth != 0u && inputHeight != 0u);
5140  ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5142  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5144  const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
5146  const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5147  const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5149  static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5151  const Scalar inputWidth1 = Scalar(inputWidth - 1u);
5152  const Scalar inputHeight1 = Scalar(inputHeight - 1u);
5154  Memory rowLookupMemory = Memory::create<Vector2>(columns);
5155  Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
5157  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5158  {
5159  input_LT_output->bilinearValues(y, rowLookupData);
5161  PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5162  uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5164  for (unsigned int x = 0u; x < columns; ++x)
5165  {
5166  const Vector2& lookupValue = rowLookupData[x];
5168  const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
5170  if (inputPosition.x() >= 0 && inputPosition.y() >= 0 && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
5171  {
5172  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5173  *outputMaskData = maskValue;
5174  }
5175  else
5176  {
5177  *outputMaskData = 0xFFu - maskValue;
5178  }
5180  outputData++;
5181  outputMaskData++;
5182  }
5183  }
5184 }
5186 template <unsigned int tChannels>
5187 void FrameInterpolatorBilinear::scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
5188 {
5189  ocean_assert(source != nullptr && target != nullptr);
5190  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5191  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5192  ocean_assert(sourceX_s_targetX > 0.0);
5193  ocean_assert(sourceY_s_targetY > 0.0);
5195  if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5196  {
5197  FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5198  return;
5199  }
5201  if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5202  {
5204  if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5205  {
5206  worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5207  return;
5208  }
5209 #else
5210  worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5211 #endif
5212  }
5213  else
5214  {
5215  if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5216  {
5218  if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5219  {
5220  scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5221  return;
5222  }
5223 #endif
5224  }
5226  scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5227  }
5228 }
5230 template <unsigned int tChannels>
5231 void FrameInterpolatorBilinear::scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5232 {
5233  ocean_assert(source != nullptr && target != nullptr);
5234  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5235  ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5236  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5238  const Scalar sourceX_T_targetX = Scalar(sourceX_s_targetX);
5239  const Scalar sourceY_T_targetY = Scalar(sourceY_s_targetY);
5241  /*
5242  * We determine the sub-pixel accurate source location for each target pixel as follows:
5243  *
5244  * Example with a downsampling by factor 4:
5245  * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
5246  * targetRow with 3 pixels: | 0 1 2 |
5247  *
5248  * Thus, the source row can be separated into three blocks;
5249  * and we want to extract the color information from the center of the blocks:
5250  * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
5251  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 4)
5252  *
5253  * Thus, we add 0.5 to each target coordinate before converting it to a source location;
5254  * and subtract 0.5 again afterwards:
5255  * sourceX = (targetX + 0.5) * sourceX_s_targetX - 0.5
5256  *
5257  * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
5258  * (1 + 0.5) * 4 - 0.5 = 5.5
5259  *
5260  *
5261  * Example with a downsampling by factor 3:
5262  * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
5263  * targetRow with 3 pixels: | 0 1 2 |
5264  *
5265  * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
5266  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 3)
5267  *
5268  * e.g., (0 + 0.5) * 3 - 0.5 = 1
5269  * (1 + 0.5) * 3 - 0.5 = 4
5270  *
5271  *
5272  * Example with a downsampling by factor 2:
5273  * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
5274  * targetRow with 3 pixels: | 0 1 2 |
5275  *
5276  * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
5277  * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 2)
5278  *
5279  * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
5280  * (1 + 0.5) * 2 - 0.5 = 2.5
5281  *
5282  *
5283  * we can simplify the calculation (as we have a constant term):
5284  * sourceX = (sourceX_s_targetX * targetX) + (sourceX_s_targetX * 0.5 - 0.5)
5285  */
5287  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5289  const Scalar sourceX_T_targetXOffset = sourceX_T_targetX * Scalar(0.5) - Scalar(0.5);
5290  const Scalar sourceY_T_targetYOffset = sourceY_T_targetY * Scalar(0.5) - Scalar(0.5);
5292  const Scalar sourceWidth_1 = Scalar(sourceWidth - 1u);
5293  const Scalar sourceHeight_1 = Scalar(sourceHeight - 1u);
5295  target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5297  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5298  {
5299  const Scalar sy = minmax(Scalar(0), sourceY_T_targetYOffset + sourceY_T_targetY * Scalar(y), sourceHeight_1);
5300  ocean_assert(sy >= Scalar(0) && sy < Scalar(sourceHeight));
5302  const unsigned int sTop = (unsigned int)sy;
5303  ocean_assert(sy >= Scalar(sTop));
5305  const Scalar ty = sy - Scalar(sTop);
5306  ocean_assert(ty >= 0 && ty <= 1);
5308  const unsigned int factorBottom = (unsigned int)(ty * Scalar(128) + Scalar(0.5));
5309  const unsigned int factorTop = 128u - factorBottom;
5311  const uint8_t* const sourceTop = source + sourceStrideElements * sTop;
5312  const uint8_t* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5314  for (unsigned int x = 0; x < targetWidth; ++x)
5315  {
5316  const Scalar sx = minmax(Scalar(0), sourceX_T_targetXOffset + sourceX_T_targetX * Scalar(x), sourceWidth_1);
5317  ocean_assert(sx >= Scalar(0) && sx < Scalar(sourceWidth));
5319  const unsigned int sLeft = (unsigned int)sx;
5320  ocean_assert(sx >= Scalar(sLeft));
5322  const Scalar tx = sx - Scalar(sLeft);
5323  ocean_assert(tx >= 0 && tx <= 1);
5325  const unsigned int factorRight = (unsigned int)(tx * Scalar(128) + Scalar(0.5));
5326  const unsigned int factorLeft = 128u - factorRight;
5328  const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5330  const uint8_t* const sourceTopLeft = sourceTop + sLeft * tChannels;
5331  const uint8_t* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5333  const unsigned int factorTopLeft = factorTop * factorLeft;
5334  const unsigned int factorTopRight = factorTop * factorRight;
5335  const unsigned int factorBottomLeft = factorBottom * factorLeft;
5336  const unsigned int factorBottomRight = factorBottom * factorRight;
5338  for (unsigned int n = 0u; n < tChannels; ++n)
5339  {
5340  target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5341  + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5342  }
5344  target += tChannels;
5345  }
5347  target += targetPaddingElements;
5348  }
5349 }
5351 template <typename T>
5352 void FrameInterpolatorBilinear::interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom)
5353 {
5354  ocean_assert(sourceRowTop != nullptr);
5355  ocean_assert(sourceRowBottom != nullptr);
5356  ocean_assert(targetRow != nullptr);
5357  ocean_assert(elements >= 1u);
5358  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5360  typedef typename FloatTyper<T>::Type FloatType;
5362  const FloatType internalFactorBottom = FloatType(factorBottom);
5363  const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5365  for (unsigned int n = 0u; n < elements; ++n)
5366  {
5367  targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5368  }
5369 }
5371 template <typename T, unsigned int tChannels>
5372 void FrameInterpolatorBilinear::interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
5373 {
5374  static_assert(tChannels != 0u, "Invalid channel number!");
5376  ocean_assert(extendedSourceRow != nullptr);
5377  ocean_assert(targetRow != nullptr);
5378  ocean_assert(targetWidth >= 1u);
5379  ocean_assert(interpolationLocations != nullptr);
5380  ocean_assert(interpolationFactorsRight != nullptr);
5381  ocean_assert(channels == tChannels);
5383  typedef typename FloatTyper<T>::Type FloatType;
5385  for (unsigned int x = 0u; x < targetWidth; ++x)
5386  {
5387  const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5388  ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5390  const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5392  const unsigned int& leftLocation = interpolationLocations[x];
5393  const unsigned int rightLocation = leftLocation + tChannels; // location is defined in relation to elements, not to pixels
5395  for (unsigned int n = 0u; n < tChannels; ++n)
5396  {
5397  targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5398  }
5399  }
5400 }
5406 template <>
5407 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5408 {
5409  ocean_assert(source != nullptr && target != nullptr);
5410  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5411  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5412  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5413  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5414  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5416  ocean_assert(sourcePaddingElements == 0u); // not supported
5417  ocean_assert(targetPaddingElements == 0u);
5419  typedef typename DataType<uint8_t, 2u>::Type PixelType;
5421  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5422  const PixelType* const sourcePixelData = (const PixelType*)source;
5424  // our offset values for the eight left pixels in relation to the first pixel of the row
5425  unsigned int leftOffsets[8];
5427  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5428  // fixedPointLocation = floatLocation * 2^16
5429  //
5430  // [FEDCBA98, 76543210]
5431  // [pixel , subpixel]
5432  //
5433  // fixedPointLocation = pixel + subpixel / 2^16
5434  //
5435  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5436  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5438  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5439  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5441  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5442  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5444  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5445  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5447  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5448  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5450  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5451  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5453  // we store 4 integers: [0, 0, 0, 0]
5454  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5456  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5457  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5459  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5460  {
5461  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5463  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5464  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5465  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5467  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5468  // factorTop = 128 - factorBottom
5469  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5471  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5473  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5474  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5476  for (unsigned int x = 0; x < targetWidth; x += 8u)
5477  {
5478  if (x + 8u > targetWidth)
5479  {
5480  // the last iteration will not fit into the output frame,
5481  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5483  ocean_assert(x >= 8u && targetWidth > 8u);
5484  const unsigned int newX = targetWidth - 8u;
5486  ocean_assert(x > newX);
5487  targetPixelData -= x - newX;
5489  x = newX;
5491  // the for loop will stop after this iteration
5492  ocean_assert(!(x + 8u < targetWidth));
5493  }
5496  // we need four successive x coordinate floats:
5497  // [x + 3, x + 2, x + 1; x + 0]
5498  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5499  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5501  // we calculate the four source locations for our four target locations
5502  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5503  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5505  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5506  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5508  // now we determine the pixel/integer accurate source locations
5509  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5510  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5511  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5513  // we store the offsets we have calculated
5514  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5515  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5519  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
5520  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
5522  uint8x8x2_t topLeftPixels;
5523  uint8x8x2_t topRightPixels;
5525  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5526  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5528  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5529  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5531  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5532  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5534  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5535  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5537  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5538  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5540  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5541  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5543  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5544  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5546  topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5547  topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5550  // we load the individual pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
5552  uint8x8x2_t bottomLeftPixels;
5553  uint8x8x2_t bottomRightPixels;
5555  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5556  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5558  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5559  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5561  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5562  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5564  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5565  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5567  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5568  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5570  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5571  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5573  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5574  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5576  bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5577  bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5581  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5582  // we need an accuracy of 7 bits (values between 0 and 128):
5583  // 76 54 32 10
5584  // [F3 F2 F1 F0]
5585  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5586  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5588  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5589  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5590  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5591  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5595  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5596  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5597  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5599  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5600  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5602  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5603  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5607  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5608  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5609  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5611  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5612  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5614  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5615  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5619  // finnally we determine the interpolation result between top and bottom row
5620  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5621  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5623  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5624  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5627  // we narrow down the interpolation results and we store them
5628  uint8x8x2_t result;
5629  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5630  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5632  // we write back the results and interleave them automatically
5633  vst2_u8((uint8_t*)targetPixelData, result);
5635  targetPixelData += 8;
5636  }
5638  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5639  // **TODO** this is just a temporary solution, check how we can avoid this additional step
5641  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5643  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5644  {
5645  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5647  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5648  ocean_assert(lastSourcePixelLeft < sourceWidth);
5649  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5651  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5653  const unsigned int factorRight = factorRight_fixed16 >> 9u;
5654  const unsigned int factorLeft = 128u - factorRight;
5656  for (unsigned int c = 0u; c < 2u; ++c)
5657  {
5658  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5659  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5660  }
5661  }
5662  }
5663 }
5669 template <>
5670 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5671 {
5672  ocean_assert(source != nullptr && target != nullptr);
5673  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5674  ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5675  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5676  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5677  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5679  ocean_assert(sourcePaddingElements == 0u); // not supported
5680  ocean_assert(targetPaddingElements == 0u);
5682  typedef typename DataType<uint8_t, 2u>::Type PixelType;
5684  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5685  const PixelType* const sourcePixelData = (const PixelType*)source;
5687  // our offset values for the four left pixels in relation to the first pixel of the row
5688  unsigned int leftOffsets[8];
5690  // our color values of the eight top and bottom pixels (32 bit = 16 bit left and 16 bit right)
5691  unsigned int topPixels[8];
5692  unsigned int bottomPixels[8];
5694  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5695  // fixedPointLocation = floatLocation * 2^16
5696  //
5697  // [FEDCBA98, 76543210]
5698  // [pixel , subpixel]
5699  //
5700  // fixedPointLocation = pixel + subpixel / 2^16
5701  //
5702  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5703  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5705  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5706  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5708  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5709  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5711  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5712  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5714  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5715  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5717  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5718  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5720  // we store 4 integers: [0, 0, 0, 0]
5721  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5723  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5724  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5726  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5727  {
5728  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5730  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5731  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5732  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5734  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5735  // factorTop = 128 - factorBottom
5736  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5738  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5740  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5741  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5743  for (unsigned int x = 0; x < targetWidth; x += 8u)
5744  {
5745  if (x + 8u > targetWidth)
5746  {
5747  // the last iteration will not fit into the output frame,
5748  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5750  ocean_assert(x >= 8u && targetWidth > 8u);
5751  const unsigned int newX = targetWidth - 8u;
5753  ocean_assert(x > newX);
5754  targetPixelData -= x - newX;
5756  x = newX;
5758  // the for loop will stop after this iteration
5759  ocean_assert(!(x + 8u < targetWidth));
5760  }
5763  // we need four successive x coordinate floats:
5764  // [x + 3, x + 2, x + 1; x + 0]
5765  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5766  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5768  // we calculate the four source locations for our four target locations
5769  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5770  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5772  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5773  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5775  // now we determine the pixel/integer accurate source locations
5776  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5777  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5778  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5780  // we store the offsets we have calculated
5781  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5782  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5786  // we load the left and the right pixels into an intermediate buffer
5787  // with following pattern (with top-left TL, and top-right TR):
5788  // F E D C B A 9 8 7 6 5 4 3 2 1 0
5789  // [TR3 TR3 TL3 TL3 TR2 TR2 TL2 TL2 TR1 TR1 TL1 TL1 TR0 TR0 TL0 TL0]
5790  // [TR7 TR7 TL7 TL7 TR6 TR6 TL6 TL6 TR5 TR5 TL5 TL5 TR4 TR4 TL4 TL4]
5792  for (unsigned int n = 0u; n < 8u; ++n)
5793  {
5794  topPixels[n] = *(unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
5795  }
5797  const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
5798  const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
5800  for (unsigned int n = 0u; n < 8u; ++n)
5801  {
5802  bottomPixels[n] = *(unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
5803  }
5805  const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
5806  const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
5809  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5810  // we need an accuracy of 7 bits (values between 0 and 128):
5811  // 76 54 32 10
5812  // [F3 F2 F1 F0]
5813  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5814  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5816  // as we will have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5817  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5818  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5820  // nw we have the interpolation factors for 8 left and 8 right pixels:
5821  // 7 6 5 4 3 2 1 0
5822  // [F7 F6 F5 F4 F3 F2 F1 F0]
5823  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5826  // we de-interleave the top pixels to left and right pixels:
5827  // F E D C B A 9 8 7 6 5 4 3 2 1 0
5828  // [TL7 TL7 TL6 TL6 TL5 TL5 TL4 TL4 TL3 TL3 TL2 TL2 TL1 TL1 TL0 TL0]
5829  // [TR7 TR7 TR6 TR6 TR5 TR5 TR4 TR4 TR3 TR3 TR2 TR2 TR1 TR1 TR0 TR0]
5830  const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
5832  // we de-interleave the pixels again to separate channel 0 and channel 1:
5833  // 7 6 5 4 3 2 1 0
5834  // channel 0: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5835  // channel 1: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5836  const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
5837  const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
5839  const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
5840  const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
5842  const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
5843  const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
5846  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5847  uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
5848  uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
5850  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
5851  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
5853  const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5854  const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5857  // we proceed with the bottom pixels (as we did with the top pixels)
5858  const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
5860  const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
5861  const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
5863  const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
5864  const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
5866  const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
5867  const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
5870  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5871  m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
5872  m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
5874  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
5875  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
5877  const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5878  const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5881  // finnally we determine the interpolation result between top and bottom row
5882  m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
5883  m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
5885  m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
5886  m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
5889  // we narrow down the interpolation results and we store them
5890  uint8x8x2_t m2_64_result;
5891  m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5892  m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5894  // we write back the results and interleave them automatically
5895  vst2_u8((uint8_t*)targetPixelData, m2_64_result);
5897  targetPixelData += 8;
5898  }
5900  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5901  // **TODO** this is just a temporary solution, check how we can avoid this additional step
5903  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5905  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5906  {
5907  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5909  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5910  ocean_assert(lastSourcePixelLeft < sourceWidth);
5911  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5913  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5915  const unsigned int factorRight = factorRight_fixed16 >> 9u;
5916  const unsigned int factorLeft = 128u - factorRight;
5918  for (unsigned int c = 0u; c < 2u; ++c)
5919  {
5920  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5921  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5922  }
5923  }
5924  }
5925 }
5931 template <>
5932 inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5933 {
5934  ocean_assert(source != nullptr && target != nullptr);
5935  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5936  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5937  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5938  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5939  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5941  ocean_assert(sourcePaddingElements == 0u); // not supported
5942  ocean_assert(targetPaddingElements == 0u);
5944  typedef typename DataType<uint8_t, 3u>::Type PixelType;
5946  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5947  const PixelType* const sourcePixelData = (const PixelType*)source;
5949  // our offset values for the eight left pixels in relation to the first pixel of the row
5950  unsigned int leftOffsets[8];
5952  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5953  // fixedPointLocation = floatLocation * 2^16
5954  //
5955  // [FEDCBA98, 76543210]
5956  // [pixel , subpixel]
5957  //
5958  // fixedPointLocation = pixel + subpixel / 2^16
5959  //
5960  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5961  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5963  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5964  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5966  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5967  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5969  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5970  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5972  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5973  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5975  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5976  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5978  // we store 4 integers: [0, 0, 0, 0]
5979  const int32x4_t m128_s_zero = vdupq_n_s32(0);
5981  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5982  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5984  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5985  {
5986  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5988  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5989  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5990  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5992  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5993  // factorTop = 128 - factorBottom
5994  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5996  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5998  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5999  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6001  for (unsigned int x = 0; x < targetWidth; x += 8u)
6002  {
6003  if (x + 8u > targetWidth)
6004  {
6005  // the last iteration will not fit into the output frame,
6006  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6008  ocean_assert(x >= 8u && targetWidth > 8u);
6009  const unsigned int newX = targetWidth - 8u;
6011  ocean_assert(x > newX);
6012  targetPixelData -= x - newX;
6014  x = newX;
6016  // the for loop will stop after this iteration
6017  ocean_assert(!(x + 8u < targetWidth));
6018  }
6021  // we need four successive x coordinate floats:
6022  // [x + 3, x + 2, x + 1; x + 0]
6023  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6024  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6026  // we calculate the four source locations for our four target locations
6027  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6028  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6030  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6031  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6033  // now we determine the pixel/integer accurate source locations
6034  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6035  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6036  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6038  // we store the offsets we have calculated
6039  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6040  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6044  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6045  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6047  uint8x8x3_t topLeftPixels;
6048  uint8x8x3_t topRightPixels;
6050  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6051  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6053  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6054  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6056  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6057  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6059  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6060  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6062  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6063  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6065  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6066  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6068  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6069  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6071  topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6072  topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6075  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6077  uint8x8x3_t bottomLeftPixels;
6078  uint8x8x3_t bottomRightPixels;
6080  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6081  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6083  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6084  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6086  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6087  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6089  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6090  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6092  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6093  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6095  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6096  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6098  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6099  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6101  bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6102  bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6106  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6107  // we need an accuracy of 7 bits (values between 0 and 128):
6108  // 76 54 32 10
6109  // [F3 F2 F1 F0]
6110  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6111  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6113  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6114  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6115  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6116  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6120  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6121  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6122  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6123  uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6125  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6126  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6127  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6129  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6130  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6131  uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6135  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6136  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6137  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6138  m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6140  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6141  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6142  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6144  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6145  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6146  uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6150  // finnally we determine the interpolation result between top and bottom row
6151  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6152  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6153  m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6155  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6156  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6157  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6160  // we narrow down the interpolation results and we store them
6161  uint8x8x3_t result;
6162  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6163  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6164  result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6166  // we write back the results and interleave them automatically
6167  vst3_u8((uint8_t*)targetPixelData, result);
6169  targetPixelData += 8;
6170  }
6172  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6173  // **TODO** this is just a temporary solution, check how we can avoid this additional step
6175  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6177  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6178  {
6179  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6181  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6182  ocean_assert(lastSourcePixelLeft < sourceWidth);
6183  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6185  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6187  const unsigned int factorRight = factorRight_fixed16 >> 9u;
6188  const unsigned int factorLeft = 128u - factorRight;
6190  for (unsigned int c = 0u; c < 3u; ++c)
6191  {
6192  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6193  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6194  }
6195  }
6196  }
6197 }
6205 template <>
6206 inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6207 {
6208  ocean_assert(source != nullptr && target != nullptr);
6209  ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6210  ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6211  ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6212  ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6213  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6215  ocean_assert(sourcePaddingElements == 0u); // not supported
6216  ocean_assert(targetPaddingElements == 0u);
6218  typedef typename DataType<uint8_t, 4u>::Type PixelType;
6220  PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6221  const PixelType* const sourcePixelData = (const PixelType*)source;
6223  // our offset values for the eight left pixels in relation to the first pixel of the row
6224  unsigned int leftOffsets[8];
6226  // this function uses fixed point numbers with 16 bit for the calculation of const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6227  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6229  // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
6230  // fixedPointLocation = floatLocation * 2^16
6231  //
6232  // [FEDCBA98, 76543210]
6233  // [pixel , subpixel]
6234  //
6235  // fixedPointLocation = pixel + subpixel / 2^16
6236  //
6237  // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
6238  // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
6240  const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6241  const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6243  const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6244  const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6246  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6247  const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6249  // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6250  const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6252  // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
6253  const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6255  // we store 4 integers: [0, 0, 0, 0]
6256  const int32x4_t m128_s_zero = vdupq_n_s32(0);
6258  const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6259  const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6261  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6262  {
6263  const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6265  const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6266  const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6267  const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6269  const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6270  // factorTop = 128 - factorBottom
6271  const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6273  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6275  const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6276  const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6278  for (unsigned int x = 0; x < targetWidth; x += 8u)
6279  {
6280  if (x + 8u > targetWidth)
6281  {
6282  // the last iteration will not fit into the output frame,
6283  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6285  ocean_assert(x >= 8u && targetWidth > 8u);
6286  const unsigned int newX = targetWidth - 8u;
6288  ocean_assert(x > newX);
6289  targetPixelData -= x - newX;
6291  x = newX;
6293  // the for loop will stop after this iteration
6294  ocean_assert(!(x + 8u < targetWidth));
6295  }
6298  // we need four successive x coordinate floats:
6299  // [x + 3, x + 2, x + 1; x + 0]
6300  const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6301  const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6303  // we calculate the four source locations for our four target locations
6304  const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6305  const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6307  const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6308  const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6310  // now we determine the pixel/integer accurate source locations
6311  // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6312  const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6313  const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6315  // we store the offsets we have calculated
6316  vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6317  vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6321  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6322  // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6324  uint8x8x4_t topLeftPixels;
6325  uint8x8x4_t topRightPixels;
6327  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6328  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6330  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6331  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6333  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6334  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6336  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6337  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6339  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6340  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6342  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6343  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6345  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6346  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6348  topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6349  topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6352  // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6354  uint8x8x4_t bottomLeftPixels;
6355  uint8x8x4_t bottomRightPixels;
6357  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6358  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6360  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6361  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6363  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6364  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6366  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6367  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6369  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6370  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6372  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6373  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6375  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6376  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6378  bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6379  bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6383  // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6384  // we need an accuracy of 7 bits (values between 0 and 128):
6385  // 76 54 32 10
6386  // [F3 F2 F1 F0]
6387  const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6388  const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6390  // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6391  const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6392  const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6393  const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6397  // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6398  uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6399  uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6400  uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6401  uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6403  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6404  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6405  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6406  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6408  uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6409  uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6410  uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6411  uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6415  // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6416  m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6417  m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6418  m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6419  m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6421  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6422  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6423  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6424  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6426  uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6427  uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6428  uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6429  uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6433  // finnally we determine the interpolation result between top and bottom row
6434  m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6435  m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6436  m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6437  m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6439  m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6440  m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6441  m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6442  m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6445  // we narrow down the interpolation results and we store them
6446  uint8x8x4_t result;
6447  result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6448  result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6449  result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6450  result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6452  // we write back the results and interleave them automatically
6453  vst4_u8((uint8_t*)targetPixelData, result);
6455  targetPixelData += 8;
6456  }
6458  // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6459  // **TODO** this is just a temporary solution, check how we can avoid this additional step
6461  const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6463  for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6464  {
6465  const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6467  const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6468  ocean_assert(lastSourcePixelLeft < sourceWidth);
6469  const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6471  const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6473  const unsigned int factorRight = factorRight_fixed16 >> 9u;
6474  const unsigned int factorLeft = 128u - factorRight;
6476  for (unsigned int c = 0u; c < 4u; ++c)
6477  {
6478  ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6479  + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6480  }
6481  }
6482  }
6483 }
6485 /// \endcond
6489 template <>
6490 inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(const float* sourceRowTop, const float* sourceRowBottom, float* targetRow, const unsigned int elements, const float factorBottom)
6491 {
6492  ocean_assert(sourceRowTop != nullptr);
6493  ocean_assert(sourceRowBottom != nullptr);
6494  ocean_assert(targetRow != nullptr);
6495  ocean_assert(elements >= 16u);
6496  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6498  // [1.0f, 1.0f, 1.0f, 1.0f]
6499  const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6501  const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6502  const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4); // factorTop = 1 - factorBottom
6504  for (unsigned int n = 0u; n < elements; n += 16u)
6505  {
6506  if (n + 16u > elements)
6507  {
6508  // the last iteration will not fit into the output frame,
6509  // so we simply shift x left by some elements (at most 15) and we will calculate some elements again
6511  ocean_assert(n >= 16u && elements > 16u);
6512  const unsigned int offset = n - (elements - 16u);
6513  ocean_assert(offset < 16u);
6515  sourceRowTop -= offset;
6516  sourceRowBottom -= offset;
6517  targetRow -= offset;
6519  // the for loop will stop after this iteration
6520  ocean_assert(!(n + 16u < elements));
6521  }
6523  // loading the next four 32 bit values from the top and bottom row
6524  const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6525  const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6526  const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6527  const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6529  const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6530  const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6531  const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6532  const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6534  // interpolatedRow_32x4 = top_32x4 * factorsTop + bottom_32x4 * factorsBottom
6535  float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6536  float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6537  float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6538  float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6540  interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6541  interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6542  interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6543  interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6545  // writing back the four interpolated 32 bit results
6546  vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6547  vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6548  vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6549  vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6551  sourceRowTop += 16;
6552  sourceRowBottom += 16;
6553  targetRow += 16;
6554  }
6555 }
6557 template <>
6558 inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(const float* extendedSourceRow, float* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
6559 {
6560  ocean_assert(extendedSourceRow != nullptr);
6561  ocean_assert(targetRow != nullptr);
6562  ocean_assert(targetWidth >= 8u);
6563  ocean_assert(interpolationLocations != nullptr);
6564  ocean_assert(interpolationFactorsRight != nullptr);
6566  ocean_assert(channels == 1u);
6568  // [1.0f, 1.0f, 1.0f, 1.0f]
6569  const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6571  for (unsigned int x = 0; x < targetWidth; x += 8u)
6572  {
6573  if (x + 8u > targetWidth)
6574  {
6575  // the last iteration will not fit into the output frame,
6576  // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6578  ocean_assert(x >= 8u && targetWidth > 8u);
6579  const unsigned int newX = targetWidth - 8u;
6581  ocean_assert(x > newX);
6582  const unsigned int offset = x - newX;
6584  targetRow -= offset;
6585  interpolationLocations -= offset;
6586  interpolationFactorsRight -= offset;
6588  x = newX;
6590  // the for loop will stop after this iteration
6591  ocean_assert(!(x + 8u < targetWidth));
6592  }
6594  // we load the left and the right pixels (for four resulting target pixels)
6596  const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6597  const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6598  const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6600  const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6601  const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6602  const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6604  const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6605  const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6606  const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6608  const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6609  const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6610  const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6612  const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6613  const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6614  const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6616  const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6617  const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6618  const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6620  const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6621  const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6623  const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6624  const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6626  const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6627  const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6629  const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6630  const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6632  const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6633  const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6635  vst1q_f32(targetRow + 0, result_0123_f_32x4);
6636  vst1q_f32(targetRow + 4, result_4567_f_32x4);
6638  targetRow += 8;
6639  interpolationLocations += 8;
6640  interpolationFactorsRight += 8;
6641  }
6642 }
6644 template <>
6645 inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(const float* source, float* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6646 {
6647  ocean_assert(source != nullptr && target != nullptr);
6648  ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6649  ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6650  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6652  ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6654  const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6655  const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6657  typedef void (*InterpolateRowVerticalFunction)(const float*, const float*, float*, const unsigned int, const float);
6658  typedef void (*InterpolateRowHorizontalFunction)(const float*, float*, const unsigned int, const unsigned int, const unsigned int*, const float*);
6660  InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6661  InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6663  if (sourceWidth * 1u >= 16u)
6664  {
6665  interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6666  }
6668  if (targetWidth >= 8u)
6669  {
6670  interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6671  }
6673  target += targetStrideElements * firstTargetRow;
6675  const float sourceX_T_targetX = float(sourceX_s_targetX);
6676  const float sourceY_T_targetY = float(sourceY_s_targetY);
6678  // See the generic template function for a detailed documentation regarding interpolation factors.
6680  Memory memoryIntermediateExtendedRow;
6681  Memory memoryHorizontalInterpolationLocations;
6682  Memory memoryHorizontalInterpolationFactorsRight;
6684  if (sourceWidth != targetWidth)
6685  {
6686  // in case we are scaling the width of the frame, we use an intermediate buffer and pre-calculated interpolation locations and factors
6688  memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u); // one additional pixel
6690  memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth); // one offset for each target pixel
6692  memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth); // one factors (right) for each target pixel
6693  }
6695  if (memoryHorizontalInterpolationLocations)
6696  {
6697  ocean_assert(memoryHorizontalInterpolationFactorsRight);
6699  if (targetWidth >= 4u)
6700  {
6701  const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6702  const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6704  // [0.0f, 0.0f, 0.0f, 0.0f]
6705  const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6707  // [4.0f, 4.0f, 4.0f, 4.0f]
6708  const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6710  // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1]
6711  const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6713  // [0.0f, 1.0f, 2.0f, 3.0f]
6714  const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6715  float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6717  // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6719  for (unsigned int x = 0u; x < targetWidth; x += 4u)
6720  {
6721  if (x + 4u > targetWidth)
6722  {
6723  // the last iteration will not fit into the output frame,
6724  // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
6726  ocean_assert(x >= 4u && targetWidth > 4u);
6727  const unsigned int newX = targetWidth - 4u;
6729  ocean_assert(x > newX);
6730  const unsigned int offset = x - newX;
6732  x = newX;
6734  x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(float(offset)));
6736  // the for loop will stop after this iteration
6737  ocean_assert(!(x + 4u < targetWidth));
6738  }
6740  // we calculate the four source locations for our four target locations
6741  const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6743  // now we determine the pixel/integer accurate source locations
6744  // left = min(floor(sourceX), sourceWidth - 1)
6745  uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4); // no rounding here
6747  // we store the offsets we have calculated
6748  vst1q_u32(memoryHorizontalInterpolationLocations.data<unsigned int>() + x, left_0123_u_32x4);
6750  // factorRight = sourcceX - float(left)
6751  const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6753  vst1q_f32(memoryHorizontalInterpolationFactorsRight.data<float>() + x, factorsRight_f_32x4);
6755  // [x + 0, x + 1, x + 2, x + 3] + [4, 4, 4, 4]
6756  x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6757  }
6758  }
6759  else
6760  {
6761  const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6763  // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6765  for (unsigned int x = 0u; x < targetWidth; ++x)
6766  {
6767  const float sourceX = max(0.0f, targetOffsetX + float(x) * sourceX_T_targetX);
6769  const unsigned int left = min((unsigned int)sourceX, sourceWidth - 1u); // no rounding here
6771  memoryHorizontalInterpolationLocations.data<unsigned int>()[x] = left;
6773  const float factorRight = sourceX - float(left);
6774  ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6776  memoryHorizontalInterpolationFactorsRight.data<float>()[x] = factorRight;
6777  }
6778  }
6779  }
6781  const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
6783  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6784  {
6785  const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY * float(y), float(sourceHeight) - 1.0f);
6787  const unsigned int sourceRowTop = (unsigned int)sourceY; // we must not round here
6788  const float factorBottom = sourceY - float(sourceRowTop);
6789  ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6791  const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6793  const float* const sourceTopRow = source + sourceStrideElements * sourceRowTop;
6794  const float* const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
6796  float* targetRow = nullptr;
6798  if (sourceHeight == targetHeight)
6799  {
6800  ocean_assert(sourceWidth != targetWidth);
6801  ocean_assert(memoryIntermediateExtendedRow);
6803  // we do not need to interpolate two lines, thus we simply need to copy the row (as we need an additional pixel at the end)
6804  memcpy(memoryIntermediateExtendedRow.data<float>(), sourceTopRow, sourceWidth * sizeof(float));
6805  }
6806  else
6807  {
6808  // in case we do not scale the width of the frame, we can write the result to the target frame directly
6809  targetRow = memoryIntermediateExtendedRow.isNull() ? target : memoryIntermediateExtendedRow.data<float>();
6811  ocean_assert(targetRow != nullptr);
6812  ocean_assert(interpolateRowVerticalFunction != nullptr);
6813  interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
6814  }
6816  if (memoryIntermediateExtendedRow) // sourceWidth != targetWidth
6817  {
6818  // we use an extended row (with one additional pixel at the end - equal to the last pixel)
6819  // so we have to copy the last pixel
6820  memoryIntermediateExtendedRow.data<float>()[sourceWidth] = memoryIntermediateExtendedRow.data<float>()[sourceWidth - 1u];
6822  interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.data<float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.data<unsigned int>(), memoryHorizontalInterpolationFactorsRight.data<float>());
6823  }
6825  target += targetStrideElements;
6826  }
6827 }
6829 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
6831 template <typename T, typename TScale, unsigned int tChannels>
6832 void FrameInterpolatorBilinear::scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6833 {
6834  static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value), "Invalid TScale type");
6836  ocean_assert(source != nullptr && target != nullptr);
6837  ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
6838  ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
6839  ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6841  const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
6842  const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
6844  const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
6845  const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
6847  /*
6848  * We determine the sub-pixel accurate source location for each target pixel as follows:
6849  *
6850  * Example with a downsampling by factor 4:
6851  * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
6852  * targetRow with 3 pixels: | 0 1 2 |
6853  *
6854  * Thus, the source row can be separated into three blocks;
6855  * and we want to extract the color information from the center of the blocks:
6856  * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
6857  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 4)
6858  *
6859  * Thus, we add 0.5 to each target coordinate before converting it to a source location;
6860  * and subtract 0.5 again afterwards:
6861  * sourceX = (targetX + 0.5) * targetTSourceX - 0.5
6862  *
6863  * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
6864  * (1 + 0.5) * 4 - 0.5 = 5.5
6865  *
6866  *
6867  * Example with a downsampling by factor 3:
6868  * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
6869  * targetRow with 3 pixels: | 0 1 2 |
6870  *
6871  * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
6872  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 3)
6873  *
6874  * e.g., (0 + 0.5) * 3 - 0.5 = 1
6875  * (1 + 0.5) * 3 - 0.5 = 4
6876  *
6877  *
6878  * Example with a downsampling by factor 2:
6879  * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
6880  * targetRow with 3 pixels: | 0 1 2 |
6881  *
6882  * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
6883  * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 2)
6884  *
6885  * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
6886  * (1 + 0.5) * 2 - 0.5 = 2.5
6887  *
6888  *
6889  * we can simplify the calculation (as we have a constant term):
6890  * sourceX = (targetX * targetTSourceX) + (0.5 * targetTSourceX - 0.5)
6891  */
6893  const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
6894  const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
6896  const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
6897  const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
6899  target += targetStrideElements * firstTargetRow;
6901  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6902  {
6903  const TScale sy = minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
6904  ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
6906  const unsigned int sTop = (unsigned int)sy;
6907  ocean_assert(sy >= TScale(sTop));
6909  const TScale factorBottom = sy - TScale(sTop);
6910  ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
6912  const TScale factorTop = TScale(1) - factorBottom;
6913  ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
6915  const T* const sourceTop = source + sTop * sourceStrideElements;
6916  const T* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
6918  for (unsigned int x = 0; x < targetWidth; ++x)
6919  {
6920  const TScale sx = minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
6921  ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
6923  const unsigned int sLeft = (unsigned int)sx;
6924  ocean_assert(sx >= TScale(sLeft));
6926  const TScale factorRight = sx - TScale(sLeft);
6927  ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
6929  const TScale factorLeft = TScale(1) - factorRight;
6930  ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
6932  const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
6934  const T* const sourceTopLeft = sourceTop + sLeft * tChannels;
6935  const T* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
6937  const TScale factorTopLeft = factorTop * factorLeft;
6938  const TScale factorTopRight = factorTop * factorRight;
6939  const TScale factorBottomLeft = factorBottom * factorLeft;
6940  const TScale factorBottomRight = factorBottom * factorRight;
6942  for (unsigned int n = 0u; n < tChannels; ++n)
6943  {
6944  target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
6945  + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
6946  }
6948  target += tChannels;
6949  }
6951  target += targetPaddingElements;
6952  }
6953 }
6955 template <unsigned int tChannels>
6956 void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6957 {
6958  static_assert(tChannels != 0u, "Invalid channel number!");
6960  ocean_assert(firstTargetRow + numberTargetRows <= height);
6962  typedef typename DataType<uint8_t, tChannels>::Type PixelType;
6964  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
6966  uint8_t zeroColor[tChannels] = {uint8_t(0)};
6967  const PixelType bColor = borderColor ? *(const PixelType*)borderColor : *(const PixelType*)zeroColor;
6969  const SquareMatrix3 rotationMatrix3(Rotation(0, 0, 1, angle));
6970  const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
6972  const Scalar width_1 = Scalar(width - 1u);
6973  const Scalar height_1 = Scalar(height - 1u);
6974  const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
6976  for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6977  {
6978  PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
6980  const Scalar floatY = Scalar(y);
6982  for (unsigned int x = 0; x < width; ++x)
6983  {
6984  const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (Vector2(Scalar(x), floatY) - anchorPosition));
6986  if (sourceLocation.x() >= 0 && sourceLocation.y() >= 0 && sourceLocation.x() <= width_1 && sourceLocation.y() <= height_1)
6987  {
6988  interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
6989  }
6990  else
6991  {
6992  *targetPixel = bColor;
6993  }
6995  ++targetPixel;
6996  }
6997  }
6998 }
7000 } // namespace CV
7002 } // namespace Ocean
This class implements the abstract base class for all AnyCamera objects.
Definition: AnyCamera.h:130
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual bool isValid() const =0
Returns whether this camera is valid.
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition: FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition: FrameBlender.h:1160
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameInterpolatorBilinear.h:60
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition: FrameInterpolatorBilinear.h:1521
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition: FrameInterpolatorBilinear.h:1434
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition: FrameInterpolatorBilinear.h:341
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition: FrameInterpolatorBilinear.h:44
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition: FrameInterpolatorBilinear.h:4285
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition: FrameInterpolatorBilinear.h:1733
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition: FrameInterpolatorBilinear.h:1893
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1799
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:1833
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition: FrameInterpolatorBilinear.h:1960
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition: FrameInterpolatorBilinear.h:2464
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 &center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1816
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4507
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition: FrameInterpolatorBilinear.h:1786
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:3577
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition: FrameInterpolatorBilinear.h:2380
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1770
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition: FrameInterpolatorBilinear.h:3957
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition: FrameInterpolatorBilinear.h:4351
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4587
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition: FrameInterpolatorBilinear.h:1657
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4633
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition: FrameInterpolatorBilinear.h:3327
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition: FrameInterpolatorBilinear.h:4992
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:5407
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition: FrameInterpolatorBilinear.h:50
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition: FrameInterpolatorBilinear.h:5372
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:6956
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:1880
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition: FrameInterpolatorBilinear.h:3259
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition: FrameInterpolatorBilinear.h:1757
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition: FrameInterpolatorBilinear.h:4681
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition: FrameInterpolatorBilinear.h:1608
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition: FrameInterpolatorBilinear.h:1621
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition: FrameInterpolatorBilinear.h:4735
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:5231
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:1942
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition: FrameInterpolatorBilinear.h:5352
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition: FrameInterpolatorBilinear.h:1695
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition: FrameInterpolatorBilinear.h:2139
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition: FrameInterpolatorBilinear.h:5134
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition: FrameInterpolatorBilinear.h:2228
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:2649
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition: FrameInterpolatorBilinear.h:5187
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:4432
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition: FrameInterpolatorBilinear.h:6832
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition: FrameInterpolatorBilinear.h:2303
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition: FrameInterpolatorBilinear.h:2053
This class implements a 2D pixel position with pixel precision.
Definition: PixelPosition.h:65
T y() const
Returns the vertical coordinate position of this object.
Definition: PixelPosition.h:470
T x() const
Returns the horizontal coordinate position of this object.
Definition: PixelPosition.h:458
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition: SSE.h:3770
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
Template class allowing to define an array of data types.
Definition: DataType.h:27
This class implements Ocean's image class.
Definition: Frame.h:1760
bool isValid() const
Returns whether this frame is valid.
Definition: Frame.h:4416
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition: Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition: Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition: Lookup2.h:941
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition: Lookup2.h:959
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition: Lookup2.h:953
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition: Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition: Lookup2.h:1786
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition: Lookup2.h:2128
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition: Lookup2.h:1864
This class implements an object able to allocate memory.
Definition: base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition: base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition: base/Memory.h:303
This class provides basic numeric functionalities.
Definition: Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition: Numeric.h:2026
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition: Numeric.h:2087
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition: Numeric.h:2237
unsigned int width() const
Returns the width of the camera image.
Definition: PinholeCamera.h:1300
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition: PinholeCamera.h:1263
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition: PinholeCamera.h:1257
unsigned int height() const
Returns the height of the camera image.
Definition: PinholeCamera.h:1306
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition: PinholeCamera.h:1602
This class implements a 2x2 square matrix.
Definition: SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition: SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition: SquareMatrix3.h:1046
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition: SquareMatrix3.h:1365
const T & x() const noexcept
Returns the x value.
Definition: Vector2.h:698
const T & y() const noexcept
Returns the y value.
Definition: Vector2.h:710
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition: Vector2.h:746
const T & y() const noexcept
Returns the y value.
Definition: Vector3.h:812
const T & x() const noexcept
Returns the x value.
Definition: Vector3.h:800
const T & z() const noexcept
Returns the z value.
Definition: Vector3.h:824
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition: base/Utilities.h:903
Definition of individual centers of pixels.
Definition: CV.h:117
PixelPositionT< int > PixelPositionI
Definition of a PixelPosition object with a data type allowing positive and negative coordinate value...
Definition: PixelPosition.h:41
The center of a pixel is in the upper-left corner of each pixel's square.
Definition: CV.h:133
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition: CV.h:150
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition: SquareMatrix3.h:35
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition: Rotation.h:31
float Scalar
Definition of a scalar type.
Definition: Math.h:128
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition: Vector3.h:22
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition: Vector2.h:21
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition: DataType.h:373