Ocean
Loading...
Searching...
No Matches
FrameInterpolatorBilinear.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
10
11#include "ocean/cv/CV.h"
14#include "ocean/cv/SSE.h"
15
16#include "ocean/base/DataType.h"
17#include "ocean/base/Frame.h"
18#include "ocean/base/Memory.h"
19#include "ocean/base/Worker.h"
20
22
26#include "ocean/math/Lookup2.h"
31#include "ocean/math/Vector2.h"
32
33namespace Ocean
34{
35
36namespace CV
37{
38
39/**
40 * This class implements bilinear frame interpolator functions.
41 * @ingroup cv
42 */
43class OCEAN_CV_EXPORT FrameInterpolatorBilinear
44{
45 public:
46
47 /**
48 * Definition of a lookup table for 2D vectors.
49 */
51
52 public:
53
54 /**
55 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
56 * Best practice is to avoid using these functions if binary size matters,<br>
57 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
58 */
59 class OCEAN_CV_EXPORT Comfort
60 {
61 public:
62
63 /**
64 * Resizes/rescales a given frame by application of a bilinear interpolation.
65 * @param source The source frame to resize, must be valid
66 * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
67 * @param worker Optional worker object used for load distribution
68 * @return True, if the frame could be resized
69 */
70 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
71
72 /**
73 * Resizes/rescales a given frame by application of a bilinear interpolation.
74 * @param frame The frame to resize, must be valid
75 * @param width The width of the resized frame in pixel, with range [1, infinity)
76 * @param height The height of the resized frame in pixel, with range [1, infinity)
77 * @param worker Optional worker object used for load distribution
78 * @return True, if the frame could be resized
79 */
80 static inline bool resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker = nullptr);
81
82 /**
83 * Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
84 * The resulting zoomed image will have the same frame type (frame resolution, pixel format, pixel origin) as the input image.<br>
85 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
86 * @param source The source frame for which the zoomed image content will be created, must be valid
87 * @param target The resulting target frame which will receive the zoomed image, will be set to the same frame type as the source frame, can be invalid
88 * @param zoomFactor The zoom factor to be applied, a factor < 1 will zoom out, a factor > 1 will zoom in, with range (0, infinity)
89 * @param worker Optional worker object to distribute the computation to several CPU cores
90 * @return True, if succeeded
91 */
92 static bool zoom(const Frame& source, Frame& target, const Scalar zoomFactor, Worker* worker = nullptr);
93
94 /**
95 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
96 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
97 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
98 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
99 * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
100 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
101 * @param input The input frame that will be transformed, must be valid
102 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
103 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
104 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels and the data type of the pixel elements, nullptr to assign 0 to each channel
105 * @param worker Optional worker object to distribute the computational load
106 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
107 * @return True, if succeeded
108 */
109 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
110
111 /**
112 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
113 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
114 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
115 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
116 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
117 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
118 * @param input The input frame that will be transformed
119 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
120 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
121 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
122 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
123 * @param worker Optional worker object to distribute the computational load
124 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
125 * @return True, if succeeded
126 */
127 static bool homographies(const Frame& input, Frame& output, const SquareMatrix3 homographies[4], const Vector2& outputQuadrantCenter, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
128
129 /**
130 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
131 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
132 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
133 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
134 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
135 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
136 * @param input The input frame that will be transformed, must be valid
137 * @param output The Output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
138 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid and must have the same frame dimension as the output frame
139 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
140 * @param worker Optional worker object to distribute the computational load
141 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
142 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
143 * @return True, if succeeded
144 * @see coversHomographyInputFrame().
145 */
146 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
147
148 /**
149 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
150 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
151 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
152 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
153 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
154 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
155 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
156 * @param input The input frame that will be transformed, must be valid
157 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
158 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
159 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
160 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
161 * @param worker Optional worker object to distribute the computational load
162 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
163 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
164 * @return True, if succeeded
165 * @see coversHomographyInputFrame().
166 */
167 static bool homographiesMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3* homographies, const Vector2& outputQuadrantCenter, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
168
169 /**
170 * Transforms a given input frame into an output frame by application of a homography.
171 * This function also uses a camera profile to improve the interpolation accuracy.<br>
172 * The given homography is transformed into a homography for normalized image coordinates.<br>
173 * Thus, also distortion parameters of the camera profile can be applied.<br>
174 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
175 * @param inputCamera The pinhole camera profile to be applied for the input frame
176 * @param outputCamera The pinhole camera profile to be applied for the output frame
177 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
178 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
179 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
180 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
181 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
182 * @param worker Optional worker object to distribute the computational load
183 * @return True, if succeeded
184 * @see homographyWithCameraMask(), homography().
185 */
186 static bool homographyWithCamera(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const Frame& input, Frame& output, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor = nullptr, Worker* worker = nullptr);
187
188 /**
189 * Transforms a given input frame into an output frame by application of a homography.
190 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
191 * This function also uses a camera profile to improve the interpolation accuracy.<br>
192 * The given homography is transformed into a homography for normalized image coordinates.<br>
193 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
194 * Thus, also distortion parameters of the camera profile can be applied.<br>
195 * @param inputCamera The pinhole camera profile to be applied for the input frame
196 * @param outputCamera The pinhole camera profile to be applied for the output frame
197 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
198 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
199 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
200 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
201 * @param worker Optional worker object to distribute the computational load
202 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
203 * @return True, if succeeded
204 * @see homographyWithCamera(), homography().
205 */
206 static bool homographyWithCameraMask(const AnyCamera& inputCamera, const AnyCamera& outputCamera, const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& homography, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
207
208 /**
209 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
210 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
211 * Information: This function is the equivalent to OpenCV's cv::remap().
212 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
213 * @param input The input frame that will be transformed
214 * @param output Resulting output frame, the dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
215 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
216 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
217 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
218 * @param worker Optional worker object to distribute the computation
219 * @return True, if succeeded
220 */
221 static bool lookup(const Frame& input, Frame& output, const LookupTable& input_LT_output, const bool offset, const void* borderColor, Worker* worker = nullptr);
222
223 /**
224 * Transforms a given input frame into an output frame by application of an interpolation lookup table and creates and additional mask as output.
225 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
226 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
227 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
228 * @param input The input frame which will be transformed
229 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
230 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
231 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
232 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
233 * @param worker Optional worker object to distribute the computation
234 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
235 * @return True, if succeeded
236 */
237 static bool lookupMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& input_LT_output, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
238
239 /**
240 * Applies an affine transformation to an image.
241 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.
242 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation.
243 * The multiplication of the affine transformation with pixel location in the target image yield their location in the source image, i.e., sourcePoint = source_A_target * targetPoint.
244 * The parameter 'targetOrigin' applies an additional translation to the provided affine transformation i.e., source_A_target * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
245 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
246 * <pre>
247 * a c e
248 * b d f
249 * 0 0 1
250 * </pre>
251 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
252 * Information: This function is the equivalent to OpenCV's cv::warpAffine().
253 * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
254 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
255 * @param source The source frame that will be transformed, must be valid
256 * @param target The resulting frame after applying the affine transformation to the source frame; pixel format and pixel origin must be identical to source frame; memory of target frame must be allocated by the caller
257 * @param source_A_target Affine transform used to transform the given source frame, transforming points defined in the target frame into points defined in the source frame
258 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
259 * @param worker Optional worker object to distribute the computational load
260 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
261 * @return True, if succeeded
262 */
263 static bool affine(const Frame& source, Frame& target, const SquareMatrix3& source_A_target, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& targetOrigin = PixelPositionI(0, 0));
264
265 /**
266 * Rotates a given frame by a bilinear interpolation.
267 * The frame will be rotated around a specified anchor position (inside or outside the frame).<br>
268 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
269 * @param source The source frame to be rotated, must be valid
270 * @param target The target frame which will receive the rotated image, will be set to the same frame type as the source frame, can be invalid
271 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
272 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
273 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
274 * @param worker Optional worker object to distribute the computation to several CPU cores
275 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
276 * @return True, if succeeded
277 */
278 static bool rotate(const Frame& source, Frame& target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
279
280 /**
281 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
282 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
283 * @param sourceFrame The source image captured with the source camera profile, must be valid
284 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
285 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
286 * @param targetCamera The camera profile of the target frame, must be valid
287 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
288 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
289 * @param worker Optional worker object to distribute the computational load
290 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
291 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use ElementType(0) for each channel
292 * @return True, if succeeded
293 * @see resampleCameraImageImage8BitPerChannel().
294 */
295 static bool resampleCameraImage(const Frame& sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, Frame& targetFrame, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const void* borderColor = nullptr);
296
297 /**
298 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
299 * This function uses an integer interpolation with a precision of 1/128.
300 * @param frame The frame to determine the pixel values from, must be valid
301 * @param channels Number of channels of the given frame, with range [1, 8]
302 * @param width The width of the frame in pixel, with range [1, infinity)
303 * @param height The height of the frame in pixel, with range [1, infinity)
304 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
305 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
306 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
307 * @param result Resulting pixel values, must be valid, must be valid
308 * @return True, if succeeded
309 * @tparam TScalar The scalar data type of the sub-pixel position
310 */
311 template <typename TScalar = Scalar>
312 static bool interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result);
313
314 /**
315 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
316 * This function uses floating point precision during interpolation.
317 * @param frame The frame to determine the pixel values from, must be valid
318 * @param channels Number of channels of the given frame, with range [1, 8]
319 * @param width The width of the frame in pixel, with range [1, infinity)
320 * @param height The height of the frame in pixel, with range [1, infinity)
321 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
322 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
323 * @param position The position to determine the interpolated pixel values for, with range [0, width)x[0, height)
324 * @param result Resulting interpolated pixel value(s), must be valid
325 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
326 * @return True, if succeeded
327 * @tparam TSource The data type of the provided pixel values in the (source) frame
328 * @tparam TTarget The data type of the resulting interpolated value(s)
329 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
330 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
331 */
332 template <typename TSource, typename TTarget, typename TScalar = Scalar, typename TIntermediate = TScalar>
333 static bool interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
334 };
335
336 /**
337 * This class implements highly optimized interpolation functions with fixed properties.
338 * The functions can be significantly faster as these functions are tailored to the specific properties.
339 */
340 class OCEAN_CV_EXPORT SpecialCases
341 {
342 public:
343
344 /**
345 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
346 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
347 * @param source The source frame buffer with resolution 400x400, must be valid
348 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
349 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
350 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
351 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
352 */
353 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
354
355 /**
356 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 by using a bilinear interpolation.
357 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
358 * @param source The source frame buffer with resolution 400x400, must be valid
359 * @param target The target frame buffer receiving the resized image information, with resolution 256x256, must be valid
360 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
361 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
362 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
363 */
364 static void resize400x400To256x256_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
365 };
366
367 /**
368 * Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinear interpolation.
369 * This function is actually a wrapper for scale().
370 * @param source The source frame buffer providing the image information to be resized, must be valid
371 * @param target The target frame buffer receiving the resized image information, must be valid
372 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
373 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
374 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
375 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
376 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
377 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
378 * @param worker Optional worker object to distribute the computation to several CPU cores
379 * @tparam T Data type of each pixel channel, e.g., float, double, int
380 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
381 * @see scale<T, tChannels>().
382 */
383 template <typename T, unsigned int tChannels>
384 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
385
386 /**
387 * Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interpolation with user-defined scaling factors.
388 * Beware: This function is not optimized for performance but supports arbitrary data types.<br>
389 * Try to use scale8BitPerChannel() if possible.
390 * @param source The source frame buffer providing the image information to be resized, must be valid
391 * @param target The target frame buffer receiving the rescaled image information, must be valid
392 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
393 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
394 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
395 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
396 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
397 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
398 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
399 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
400 * @param worker Optional worker object to distribute the computation to several CPU cores
401 * @tparam T Data type of each pixel channel, e.g., float, double, int
402 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
403 * @see resize<T, tChannels>().
404 */
405 template <typename T, unsigned int tChannels>
406 static inline void scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
407
408 /**
409 * Rotates a given frame by a bilinear interpolation.
410 * The frame will be rotated around a specified anchor position (inside or outside the frame).
411 * @param source The source frame to be rotated, must be valid
412 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
413 * @param width The width of the source and target frame in pixel, with range [1, infinity)
414 * @param height The height of the source and target frame in pixel, with range [1, infinity)
415 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
416 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
417 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
418 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
419 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
420 * @param worker Optional worker object to distribute the computation to several CPU cores
421 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
422 * @tparam tChannels The number of channels both frames have, with range [1, infinity)
423 */
424 template <unsigned int tChannels>
425 static inline void rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
426
427 /**
428 * Apply an affine transforms to a N-channel, 8-bit frame
429 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.<br>
430 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation).<br>
431 * The 'targetOrigin' parameter simply applies an additional translation onto the provided affine transformation i.e., affine * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
432 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
433 * <pre>
434 * a c e
435 * b d f
436 * 0 0 1
437 * </pre>
438 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
439 * @param source Input frame that will be transformed, must be valid
440 * @param sourceWidth Width of both images in pixel, with range [1, infinity)
441 * @param sourceHeight Height of both images pixel, with range [1, infinity)
442 * @param source_A_target Affine transformation, such that: sourcePoint = source_A_target * targetPoint
443 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
444 * @param target The target frame using the given affine transform, must be valid
445 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
446 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
447 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
448 * @param sourcePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
449 * @param targetPaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
450 * @param worker Optional worker object to distribute the computational load
451 * @tparam tChannels Number of channels of the frame
452 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
453 */
454 template <unsigned int tChannels>
455 static inline void affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
456
457 /**
458 * Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of a homography.
459 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
460 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
461 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
462 * @param input The input frame that will be transformed, must be valid
463 * @param inputWidth Width of both images in pixel, with range [1, infinity)
464 * @param inputHeight Height of both images pixel, with range [1, infinity)
465 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
466 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
467 * @param output The output frame using the given homography, must be valid
468 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
469 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
470 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
471 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
472 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
473 * @param worker Optional worker object to distribute the computational load
474 * @tparam T Data type of each pixel channel, e.g., float, double, int
475 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
476 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
477 */
478 template <typename T, unsigned int tChannels>
479 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
480
481 /**
482 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
483 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
484 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
485 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
486 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
487 * @param input The input frame that will be transformed
488 * @param inputWidth Width of both images in pixel, with range [1, infinity)
489 * @param inputHeight Height of both images pixel, with range [1, infinity)
490 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
491 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
492 * @param output The output frame using the given homography
493 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
494 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
495 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
496 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
497 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
498 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
499 * @param worker Optional worker object to distribute the computational load
500 * @tparam tChannels Number of channels of the frame
501 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
502 */
503 template <unsigned int tChannels>
504 static inline void homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
505
506 /**
507 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
508 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
509 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
510 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
511 * @param input The input frame that will be transformed, must be valid
512 * @param inputWidth Width of both images in pixel, with range [1, infinity)
513 * @param inputHeight Height of both images pixel, with range [1, infinity)
514 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
515 * @param output The output frame using the given homography, must be valid
516 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid
517 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
518 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
519 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
520 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
521 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
522 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
523 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
524 * @param worker Optional worker object to distribute the computational load
525 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
526 * @see homography(), homographyWithCamera8BitPerChannel().
527 */
528 template <unsigned int tChannels>
529 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue /* = 0xFF*/, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr);
530
531 /**
532 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
533 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
534 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
535 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
536 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
537 * @param input The input frame that will be transformed
538 * @param inputWidth Width of both images in pixel, with range [1, infinity)
539 * @param inputHeight Height of both images pixel, with range [1, infinity)
540 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
541 * @param output The output frame using the given homography
542 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
543 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
544 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
545 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
546 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
547 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
548 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
549 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
550 * @param worker Optional worker object to distribute the computational load
551 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
552 * @tparam tChannels Number of channels of the frame
553 * @see homography(), homographyWithCamera8BitPerChannel().
554 */
555 template <unsigned int tChannels>
556 static inline void homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
557
558 /**
559 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
560 * This function also uses a camera profile to improve the interpolation accuracy.<br>
561 * The given homography is transformed into a homography for normalized image coordinates.<br>
562 * Thus, also distortion parameters of the camera profile can be applied.<br>
563 * @param inputCamera The pinhole camera profile to be applied for the input frame
564 * @param outputCamera The pinhole camera profile to be applied for the output frame
565 * @param input The input frame that will be transformed
566 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
567 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
568 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
569 * @param output The output frame using the given homography
570 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
571 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
572 * @param worker Optional worker object to distribute the computational load
573 * @tparam tChannels Number of channels of the frame
574 * @see homography().
575 */
576 template <unsigned int tChannels>
577 static inline void homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
578
579 /**
580 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
581 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame.<br>
582 * This function also uses a camera profile to improve the interpolation accuracy.<br>
583 * The given homography is transformed into a homography for normalized image coordinates.<br>
584 * Thus, also distortion parameters of the camera profile can be applied.
585 * @param inputCamera The pinhole camera profile to be applied for the input frame, must be valid
586 * @param outputCamera The pinhole camera profile to be applied for the output frame, must be valid
587 * @param input The input frame that will be transformed, must be valid
588 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
589 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
590 * @param output The output frame using the given homography
591 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
592 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
593 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
594 * @param worker Optional worker object to distribute the computational load
595 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
596 * @tparam tChannels Number of channels of the frame
597 */
598 template <unsigned int tChannels>
599 static inline void homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
600
601 /**
602 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
603 * The frame must have a 1-plane pixel format.<br>
604 * The output frame must have the same pixel format and pixel origin as the input frame.
605 * @param input The input frame which will be transformed, must be valid
606 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
607 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
608 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
609 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
610 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
611 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
612 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
613 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
614 * @param worker Optional worker object to distribute the computation
615 * @tparam T Data type of each pixel channel, e.g., float, double, int
616 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
617 */
618 template <typename T, unsigned int tChannels>
619 static inline void lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
620
621 /**
622 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
623 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
624 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
625 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
626 * @param input The input frame which will be transformed
627 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
628 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
629 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
630 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
631 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
632 * @param outputMask Resulting mask frame with 8 bits per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
633 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
634 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
635 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
636 * @param worker Optional worker object to distribute the computation
637 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
638 * @tparam tChannels Number of channels of the frame
639 */
640 template <unsigned int tChannels>
641 static inline void lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
642
643 /**
644 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
645 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
646 * @param sourceFrame The source image captured with the source camera profile, must be valid
647 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
648 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
649 * @param targetCamera The camera profile of the target frame, must be valid
650 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
651 * @param sourceFramePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
652 * @param targetFramePaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
653 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
654 * @param worker Optional worker object to distribute the computational load
655 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
656 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use T(0) for each channel
657 * @tparam T Data type of each pixel channel, e.g., uint8_t, int16_t, float, double
658 * @tparam tChannels The number of frame channels, with range [1, infinity)
659 * @see Comfort::resampleCameraImage().
660 */
661 template <typename T, unsigned int tChannels>
662 static void resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const T* borderColor = nullptr);
663
664 /**
665 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
666 * This function uses an integer interpolation with a precision of 1/128.
667 * @param frame The frame to determine the pixel values from, must be valid
668 * @param width The width of the frame in pixel, with range [1, infinity)
669 * @param height The height of the frame in pixel, with range [1, infinity)
670 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
671 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
672 * @param result Resulting pixel values, must be valid, must be valid
673 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
674 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
675 * @tparam TScalar The scalar data type of the sub-pixel position
676 * @see interpolatePixel().
677 */
678 template <unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
679 static inline void interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result);
680
681 /**
682 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
683 * This function uses floating point precision during interpolation.
684 * @param frame The frame to determine the pixel values from, must be valid
685 * @param width The width of the frame in pixel, with range [1, infinity)
686 * @param height The height of the frame in pixel, with range [1, infinity)
687 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
688 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
689 * @param result Resulting interpolated pixel value(s), must be valid
690 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
691 * @tparam TSource The data type of the provided pixel values in the (source) frame
692 * @tparam TTarget The data type of the resulting interpolated value(s)
693 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
694 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
695 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
696 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
697 * @see interpolatePixel8BitPerChannel().
698 */
699 template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar, typename TIntermediate = TScalar>
700 static inline void interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
701
702 /**
703 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame with alpha channel.
704 * The center of each pixel is located with an offset of (0.5 x 0.5) in relation to the real pixel position.<br>
705 * The given frame is virtually extended by a fully transparent border so that this functions supports arbitrary interpolation positions.<br>
706 * If the given position lies inside the frame area of (-0.5, -0.5) -> (width + 0.5, height + 0.5) the resulting interpolation result will contain color information of the frame, otherwise a fully transparent interpolation result is provided.<br>
707 * @param frame The frame to determine the pixel values from, must be valid
708 * @param width The width of the frame in pixel, with range [1, infinity)
709 * @param height The height of the frame in pixel, with range [1, infinity)
710 * @param position The position to determine the interpolated pixel values for, with range (-infinity, infinity)x(-infinity, infinity)
711 * @param result Resulting pixel values, must be valid
712 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
713 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
714 * @tparam tAlphaAtFront True, if the alpha channel is in the front of the data channels
715 * @tparam tTransparentIs0xFF True, if 0xFF is interpreted as fully transparent
716 */
717 template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
718 static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements);
719
720 /**
721 * Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as lined integral frame.
722 * @param linedIntegralFrame The lined integral image created from the actual gray-scale image for which the patch intensity sum will be determined, must be valid
723 * @param frameWidth Width of the original frame in pixel (not the width of the lined-integral frame), with range [1, infinity)
724 * @param frameHeight Height of the original frame in pixel (not the height of the lined-integral frame), with range [1, infinity)
725 * @param lineIntegralFramePaddingElements The number of padding elements at the end of each integral image row, in elements, with range [0, infinity)
726 * @param center 2D coordinates of the center point of the patch, with range [patchWidth/2, frameWidth - patchWidth/2)x[patchHeight/2, frameHeight - patchHeight/2) for PC_CENTER
727 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
728 * @param patchWidth Width of the calculated patch in pixel with range [1, frameWidth - 1]
729 * @param patchHeight Height of the calculated patch in pixel with range [1, frameHeight - 1]
730 * @return The resulting sum of the pixel intensities
731 */
732 static Scalar patchIntensitySum1Channel(const uint32_t* linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2& center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight);
733
734 /**
735 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the bilinear interpolation) or whether the homography relies on missing image information.
736 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
737 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
738 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
739 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
740 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
741 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
742 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
743 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
744 */
745 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
746
747 private:
748
749 /**
750 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
751 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
752 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
753 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
754 * @param input The input frame that will be transformed, must be valid
755 * @param inputWidth Width of both images in pixel, with range [1, infinity)
756 * @param inputHeight Height of both images pixel, with range [1, infinity)
757 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
758 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
759 * @param output The output frame using the given homography, must be valid
760 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
761 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
762 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
763 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
764 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
765 * @param worker Optional worker object to distribute the computational load
766 * @tparam tChannels Number of channels of the frame
767 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
768 */
769 template <unsigned int tChannels>
770 static inline void homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
771
772 /**
773 * Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-defined scaling factors.
774 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
775 * Information: This function is the equivalent to OpenCV's cv::resize().
776 * @param source The source frame buffer providing the image information to be resized, must be valid
777 * @param target The target frame buffer receiving the rescaled image information, must be valid
778 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
779 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
780 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
781 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
782 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
783 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
784 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
785 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
786 * @param worker Optional worker object to distribute the computation to several CPU cores
787 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
788 */
789 template <unsigned int tChannels>
790 static inline void scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
791
792 /**
793 * Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
794 * @param source The image data of the source frame to be resized, must be valid
795 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
796 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
797 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
798 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
799 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
800 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
801 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
802 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
803 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
804 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
805 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
806 * @tparam tChannels Number of frame channels, with range [0, infinity)
807 */
808 template <unsigned int tChannels>
809 static void scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
810
811 /**
812 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
813 * This function uses interpolation factors with 7 bit precision and does not apply any SIMD instructions.
814 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
815 * @param targetRow The target row receiving the interpolation result, must be valid
816 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
817 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
818 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
819 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
820 * @see interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON<tChannels>().
821 */
822 static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
823
824 /**
825 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
826 * This function does not apply any SIMD instructions.<br>
827 * The length of both source rows is identical with the length of the target row.
828 * @param sourceRowTop The top source row to be used for interpolation, must be valid
829 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
830 * @param targetRow The target row receiving the interpolation result, must be valid
831 * @param elements The number of elements in the row to (width * channels), with range [1, infinity)
832 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
833 * @tparam T The data type of each element, should be 'float'
834 */
835 template <typename T>
836 static void interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
837
838 /**
839 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
840 * This function does not apply any SIMD instructions.
841 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
842 * @param targetRow The target row receiving the interpolation result, must be valid
843 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
844 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
845 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
846 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
847 * @tparam T The data type of each element, should be 'float'
848 * @tparam tChannels The number of frame channels this function can handle, should be 1
849 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
850 */
851 template <typename T, unsigned int tChannels>
852 static void interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
853
854#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
855
856 /**
857 * Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
858 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.<br>
859 * The length of both source rows is identical with the length of the target row.
860 * @param sourceRowTop The top source row to be used for interpolation, must be valid
861 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
862 * @param targetRow The target row receiving the interpolation result, must be valid
863 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
864 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 128 - factorBottom, with range [0, 128]
865 */
866 static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t* sourceRowTop, const uint8_t* sourceRowBottom, uint8_t* targetRow, const unsigned int elements, const unsigned int factorBottom);
867
868 /**
869 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
870 * This function applies NEON instructions.<br>
871 * The length of both source rows is identical with the length of the target row.
872 * @param sourceRowTop The top source row to be used for interpolation, must be valid
873 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
874 * @param targetRow The target row receiving the interpolation result, must be valid
875 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
876 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
877 * @tparam T The data type of each element, should be 'float'
878 */
879 template <typename T>
880 static void interpolateRowVerticalNEON(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
881
882 /**
883 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
884 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
885 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
886 * @param targetRow The target row receiving the interpolation result, must be valid
887 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
888 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
889 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
890 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
891 * @tparam tChannels The number of frame channels this function can handle, possible values are 1, 4
892 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
893 */
894 template <unsigned int tChannels>
895 static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
896
897 /**
898 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
899 * This function applies NEON instructions.
900 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
901 * @param targetRow The target row receiving the interpolation result, must be valid
902 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
903 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
904 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
905 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
906 * @tparam T The data type of each element, should be 'float'
907 * @tparam tChannels The number of frame channels this function can handle, should be 1
908 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
909 */
910 template <typename T, unsigned int tChannels>
911 static void interpolateRowHorizontalNEON(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
912
913 /**
914 * Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
915 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
916 * @param source The image data of the source frame to be resized, must be valid
917 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
918 * @param sourceWidth Width of the source frame in pixel, with range [2, 65.535]
919 * @param sourceHeight Height of the source frame in pixel, with range [1, 65.535]
920 * @param targetWidth Width of the target frame in pixel, with range [tMinimalTargetWidth, 65.535]
921 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
922 * @param channels The number of channels both frames have, with range [1, infinity)
923 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
924 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
925 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
926 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
927 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
928 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
929 * @see interpolateRowVertical8BitPerChannel7BitPrecisionNEON(), interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON().
930 */
931 static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
932
933#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
934
935 /**
936 * Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
937 * @param source The image data of the source frame to be resized, must be valid
938 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
939 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
940 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
941 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
942 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
943 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
944 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
945 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
946 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
947 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
948 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
949 * @tparam T The data type of each pixel channel, e.g., float, double, int, short, ...
950 * @tparam TScale The data type of the internal scaling factors to be used, should be 'float' or 'double'
951 * @tparam tChannels Number of frame channels, with range [0, infinity)
952 */
953 template <typename T, typename TScale, unsigned int tChannels>
954 static void scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
955
956 /**
957 * Rotates a subset of a given frame by a bilinear interpolation.
958 * @param source The source frame to be rotated, must be valid
959 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
960 * @param width The width of the source and target frame in pixel, with range [1, infinity)
961 * @param height The height of the source and target frame in pixel, with range [1, infinity)
962 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
963 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
964 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
965 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
966 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
967 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
968 * @param firstTargetRow The first row of the target frame to be handled, with range [0, height)
969 * @param numberTargetRows The number of rows in the target frame to be handled, with range [1, height - firstTargetRow]
970 * @tparam tChannels Number of frame channels, with range [1, infinity)
971 */
972 template <unsigned int tChannels>
973 static void rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
974
975 /**
976 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
977 * The affine transform must be provided in the following form: `sourcePoint = source_A_target * targetPoint`
978 * This function does not apply SIMD instructions and can be used for any frame dimensions.
979 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
980 * <pre>
981 * a c e
982 * b d f
983 * 0 0 1
984 * </pre>
985 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
986 * @param source Input frame that will be transformed
987 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
988 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
989 * @param source_A_target Affine transformation which is applied to the source frame.
990 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
991 * @param target Output frame using the given affine transform
992 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
993 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
994 * @param firstTargetRow The first target row to be handled
995 * @param numberTargetRows Number of target rows to be handled
996 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
997 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
998 * @tparam tChannels Number of frame channels, with range [1, infinity)
999 * @see affine8BitPerChannelSSESubset(), affine8BitPerChannelNEONSubset()
1000 */
1001 template <unsigned int tChannels>
1002 static inline void affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1003
1004 /**
1005 * Transforms an 8 bit per channel frame using the given homography.
1006 * The homography must provide the following transformation: inputPoint = homography * outputPoint
1007 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1008 * @param input The input frame that will be transformed
1009 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1010 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1011 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1012 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1013 * @param output The output frame using the given homography
1014 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1015 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1016 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1017 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1018 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1019 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1020 * @tparam tChannels Number of frame channels, with range [1, infinity)
1021 * @see homography8BitPerChannelSSESubset(), homography8BitPerChannelNEONSubset()
1022 */
1023 template <unsigned int tChannels>
1024 static inline void homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1025
1026 /**
1027 * Transforms a frame with (almost) arbitrary pixel format using the given homography.
1028 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1029 * @param input The input frame that will be transformed
1030 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1031 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1032 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1033 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1034 * @param output The output frame using the given homography
1035 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1036 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1037 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1038 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1039 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1040 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1041 * @tparam T Data type of each pixel channel, e.g., float, double, int
1042 * @tparam tChannels Number of frame channels, with range [1, infinity)
1043 * @see homography8BitPerChannelSSESubset().
1044 */
1045 template <typename T, unsigned int tChannels>
1046 static inline void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1047
1048#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1049
1050 /**
1051 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
1052 * This function applies SSE instructions.<br>
1053 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1054 * This function has the property: sourcePoint = source_A_target * targetPoint
1055 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1056 * <pre>
1057 * a c e
1058 * b d f
1059 * 0 0 1
1060 * </pre>
1061 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1062 * @param source Input frame that will be transformed
1063 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1064 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1065 * @param source_A_target Affine transformation which is applied to source frame.
1066 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1067 * @param target The target frame where the result of the transformation will be stored
1068 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1069 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1070 * @param firstTargetRow The first target row to be handled
1071 * @param numberTargetRows Number of target rows to be handled
1072 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1073 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1074 * @tparam tChannels Number of frame channels
1075 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
1076 */
1077 template <unsigned int tChannels>
1078 static inline void affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1079
1080 /**
1081 * Transforms an 8 bit per channel frame using the given homography.
1082 * This function applies SSE instructions.<br>
1083 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames
1084 * @param input The input frame that will be transformed, must be valid
1085 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1086 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1087 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1088 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1089 * @param output The output frame using the given homography, must be valid
1090 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1091 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1092 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1093 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1094 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1095 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1096 * @tparam tChannels Number of frame channels, with range [1, infinity)
1097 * @see homography8BitPerChannelSubset().
1098 */
1099 template <unsigned int tChannels>
1100 static inline void homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1101
1102 /**
1103 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1104 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1105 * @param source The source image in which the four independent pixels are located, must be valid
1106 * @param offsetsTopLeft The four offsets within the source image for the four top-left pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1107 * @param offsetsTopRight The four offsets within the source image for the four top-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1108 * @param offsetsBottomLeft The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1109 * @param offsetsBottomRight The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1110 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1111 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1112 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1113 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1114 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1115 * @tparam tChannels The number of frame channels, with range [1, infinity)
1116 */
1117 template <unsigned int tChannels>
1118 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1119
1120 /**
1121 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1122 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1123 * @param m128_sourcesTopLeft The pixel values of the four top left pixels, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1124 * @param m128_sourcesTopRight The pixel values of the four top right pixels, starting at the first byte may contain unused bytes at the end
1125 * @param m128_sourcesBottomLeft The pixel values of the four bottom left pixels, starting at the first byte may contain unused bytes at the end
1126 * @param m128_sourcesBottomRight The pixel values of the four bottom right pixels, starting at the first byte may contain unused bytes at the end
1127 * @param m128_factorsTopLeft The four interpolation factors of the four top left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1128 * @param m128_factorsTopRight The four interpolation factors of the four top right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1129 * @param m128_factorsBottomLeft The four interpolation factors of the four bottom left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1130 * @param m128_factorsBottomRight The four interpolation factors of the four bottom right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1131 * @return The resulting interpolated pixel values, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1132 * @tparam tChannels The number of frame channels, with range [3, 4]
1133 */
1134 template <unsigned int tChannels>
1135 static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i& m128_sourcesTopLeft, const __m128i& m128_sourcesTopRight, const __m128i& m128_sourcesBottomLeft, const __m128i& m128_sourcesBottomRight, const __m128i& m128_factorsTopLeft, const __m128i& m128_factorsTopRight, const __m128i& m128_factorsBottomLeft, const __m128i& m128_factorsBottomRight);
1136
1137#endif // OCEAN_HARDWARE_SSE_VERSION
1138
1139#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1140
1141 /**
1142 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
1143 * This function applies NEON instructions.<br>
1144 * This one has the property: sourcePoint = source_A_target * targetPoint
1145 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1146 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1147 * <pre>
1148 * a c e
1149 * b d f
1150 * 0 0 1
1151 * </pre>
1152 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1153 * @param source The source frame that will be transformed
1154 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1155 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1156 * @param source_A_target Affine transform used to transform the given source frame.
1157 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1158 * @param target The target frame using the given affine transform
1159 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1160 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1161 * @param firstTargetRow The first target row to be handled
1162 * @param numberTargetRows Number of target rows to be handled
1163 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1164 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1165 * @tparam tChannels Number of frame channels, with range [1, infinity)
1166 * @see homography8BitPerChannelSubset().
1167 */
1168 template <unsigned int tChannels>
1169 static inline void affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1170
1171 /**
1172 * Transforms an 8 bit per channel frame using the given homography.
1173 * This function applies NEON instructions.<br>
1174 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames.
1175 * @param input The input frame that will be transformed
1176 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1177 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1178 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1179 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1180 * @param output The output frame using the given homography
1181 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1182 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1183 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1184 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1185 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1186 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1187 * @tparam tChannels Number of frame channels, with range [1, infinity)
1188 * @see homography8BitPerChannelSubset().
1189 */
1190 template <unsigned int tChannels>
1191 static inline void homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1192
1193 /**
1194 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1195 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1196 * @param source The source image in which the four independent pixels are located, must be valid
1197 * @param offsetsTopLeftElements The four offsets within the source image for the four top-left pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1198 * @param offsetsTopRightElements The four offsets within the source image for the four top-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1199 * @param offsetsBottomLeftElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1200 * @param offsetsBottomRightElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1201 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1202 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1203 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1204 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1205 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1206 * @tparam tChannels The number of frame channels, with range [1, infinity)
1207 */
1208 template <unsigned int tChannels>
1209 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1210
1211 /**
1212 * Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must be known already (top-left, top-right, bottom-left, and bottom-right), further the interpolation factors must be known already.
1213 * @param topLeft_u_8x8 The 8 top left pixel values to be used for interpolation
1214 * @param topRight_u_8x8 The 8 top right pixel values to be used for interpolation
1215 * @param bottomLeft_u_8x8 The 8 bottom left pixel values to be used for interpolation
1216 * @param bottomRight_u_8x8 The 8 bottom right pixel values to be used for interpolation
1217 * @param factorsRight_factorsBottom_128_u_8x16 The eight horizontal interpolation factors for right pixels, and the eight vertical interpolation factors for the bottom pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1218 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1219 */
1220 static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1221
1222#endif // OCEAN_HARDWARE_SSE_VERSION
1223
1224 /**
1225 * Transforms an 8 bit per channel frame using the given homographies.
1226 * @param input The input frame that will be transformed
1227 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1228 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1229 * @param homographies Homographies used to transform the given input frame
1230 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1231 * @param output The output frame using the given homography
1232 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1233 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1234 * @param outputOriginX The horizontal coordinate of the output frame's origin
1235 * @param outputOriginY The vertical coordinate of the output frame's origin
1236 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1237 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1238 * @param inputPaddingElements The number of padding elements at the end of each input frame, in elements, with range [0, infinity)
1239 * @param outputPaddingElements The number of padding elements at the end of each output frame, in elements, with range [0, infinity)
1240 * @param firstOutputRow The first output row to be handled
1241 * @param numberOutputRows Number of output rows to be handled
1242 * @tparam tChannels Number of frame channels
1243 */
1244 template <unsigned int tChannels>
1245 static inline void homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1246
1247 /**
1248 * Transforms an 8 bit per channel frame using the given homography.
1249 * @param input The input frame that will be transformed, must be valid
1250 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1251 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1252 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1253 * @param output The output frame resulting by application of the given homography, must be valid
1254 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1255 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1256 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1257 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1258 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1259 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1260 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
1261 * @param firstOutputRow The first output row to be handled
1262 * @param numberOutputRows Number of output rows to be handled
1263 * @tparam tChannels Number of frame channels, with range [1, infinity)
1264 */
1265 template <unsigned int tChannels>
1266 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1267
1268 /**
1269 * Transforms an 8 bit per channel frame using the given homography.
1270 * @param input The input frame that will be transformed
1271 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1272 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1273 * @param homographies Homographies used to transform the given input frame
1274 * @param output The output frame resulting by application of the given homography
1275 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1276 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1277 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1278 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1279 * @param outputOriginX The horizontal coordinate of the output frame's origin
1280 * @param outputOriginY The vertical coordinate of the output frame's origin
1281 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1282 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1283 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1284 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1285 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
1286 * @param firstOutputRow The first output row to be handled
1287 * @param numberOutputRows Number of output rows to be handled
1288 * @tparam tChannels Number of frame channels
1289 */
1290 template <unsigned int tChannels>
1291 static inline void homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1292
1293 /**
1294 * Transforms an 8 bit per channel frame using the given homography.
1295 * @param inputCamera The pinhole camera profile to be applied for the input frame
1296 * @param outputCamera The pinhole camera profile to be applied for the output frame
1297 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1298 * @param input The input frame that will be transformed
1299 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1300 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
1301 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1302 * @param output The output frame resulting by application of the given homography
1303 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1304 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1305 * @param firstRow The first row to be handled
1306 * @param numberRows Number of rows to be handled
1307 * @tparam tChannels Number of frame channels
1308 */
1309 template <unsigned int tChannels>
1310 static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1311
1312 /**
1313 * Transforms an 8 bit per channel frame using the given homography.
1314 * @param inputCamera The pinhole camera profile to be applied for the input frame
1315 * @param outputCamera The pinhole camera profile to be applied for the output frame
1316 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1317 * @param input The input frame that will be transformed, must be valid
1318 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1319 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1320 * @param output The output frame resulting by application of the given homography
1321 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1322 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1323 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
1324 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1325 * @param firstRow The first row to be handled
1326 * @param numberRows Number of rows to be handled
1327 * @tparam tChannels Number of frame channels
1328 */
1329 template <unsigned int tChannels>
1330 static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
1331
1332 /**
1333 * Transforms a subset of a given input frame with uint8_t as element type into an output frame by application of an interpolation lookup table.
1334 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1335 * @param input The input frame which will be transformed, must be valid
1336 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1337 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1338 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1339 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1340 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1341 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1342 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1343 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1344 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1345 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1346 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1347 */
1348 template <unsigned int tChannels>
1349 static void lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1350
1351 /**
1352 * Transforms a subset of a given input frame with arbitrary element type into an output frame by application of an interpolation lookup table.
1353 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1354 * @param input The input frame which will be transformed, must be valid
1355 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1356 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1357 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1358 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1359 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
1360 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
1361 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1362 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1363 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1364 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1365 * @tparam T Data type of each pixel channel, must not be 'uint8_t'
1366 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1367 */
1368 template <typename T, unsigned int tChannels>
1369 static void lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1370
1371#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1372
1373 /**
1374 * Transforms a subset of a given input frame into an output frame by application of an interpolation lookup table and uses NEON instructions.
1375 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1376 * @param input The input frame which will be transformed, must be valid
1377 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1378 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1379 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), with table width >= 4, must be valid
1380 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1381 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1382 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1383 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1384 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1385 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1386 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1387 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1388 */
1389 template <unsigned int tChannels>
1390 static void lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1391
1392#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1393
1394 /**
1395 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
1396 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1397 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
1398 * @param input The input frame which will be transformed
1399 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1400 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1401 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1402 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1403 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1404 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1405 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
1406 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
1407 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
1408 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
1409 * @param firstRow First row to be handled
1410 * @param numberRows Number of rows to be handled
1411 * @tparam tChannels Number of channels of the frame
1412 */
1413 template <unsigned int tChannels>
1414 static void lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1415};
1416
1417inline bool FrameInterpolatorBilinear::Comfort::resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker)
1418{
1419 ocean_assert(frame.isValid());
1420 ocean_assert(width >= 1u && height >= 1u);
1421
1422 Frame target(FrameType(frame, width, height));
1423
1424 if (!resize(frame, target, worker))
1425 {
1426 return false;
1427 }
1428
1429 target.setTimestamp(frame.timestamp());
1431
1432 frame = std::move(target);
1433 return true;
1434}
1435
1436template <typename TScalar>
1437bool FrameInterpolatorBilinear::Comfort::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result)
1438{
1439 ocean_assert(frame != nullptr);
1440 ocean_assert(channels >= 1u && channels <= 8u);
1441
1442 if (pixelCenter == PC_TOP_LEFT)
1443 {
1444 switch (channels)
1445 {
1446 case 1u:
1447 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1448 return true;
1449
1450 case 2u:
1451 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1452 return true;
1453
1454 case 3u:
1455 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1456 return true;
1457
1458 case 4u:
1459 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1460 return true;
1461
1462 case 5u:
1463 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1464 return true;
1465
1466 case 6u:
1467 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1468 return true;
1469
1470 case 7u:
1471 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1472 return true;
1473
1474 case 8u:
1475 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1476 return true;
1477
1478 default:
1479 break;
1480 }
1481 }
1482 else
1483 {
1484 ocean_assert(pixelCenter == PC_CENTER);
1485
1486 switch (channels)
1487 {
1488 case 1u:
1489 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1490 return true;
1491
1492 case 2u:
1493 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1494 return true;
1495
1496 case 3u:
1497 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1498 return true;
1499
1500 case 4u:
1501 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1502 return true;
1503
1504 case 5u:
1505 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1506 return true;
1507
1508 case 6u:
1509 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1510 return true;
1511
1512 case 7u:
1513 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1514 return true;
1515
1516 case 8u:
1517 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1518 return true;
1519
1520 default:
1521 break;
1522 }
1523 }
1524
1525 ocean_assert(false && "Invalid channel number");
1526 return false;
1527}
1528
1529template <typename TSource, typename TTarget, typename TScalar, typename TIntermediate>
1530bool FrameInterpolatorBilinear::Comfort::interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
1531{
1532 ocean_assert(frame != nullptr);
1533 ocean_assert(channels >= 1u && channels <= 8u);
1534
1535 if (pixelCenter == PC_TOP_LEFT)
1536 {
1537 switch (channels)
1538 {
1539 case 1u:
1540 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1541 return true;
1542
1543 case 2u:
1544 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1545 return true;
1546
1547 case 3u:
1548 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1549 return true;
1550
1551 case 4u:
1552 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1553 return true;
1554
1555 case 5u:
1556 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1557 return true;
1558
1559 case 6u:
1560 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1561 return true;
1562
1563 case 7u:
1564 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1565 return true;
1566
1567 case 8u:
1568 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1569 return true;
1570
1571 default:
1572 break;
1573 }
1574 }
1575 else
1576 {
1577 ocean_assert(pixelCenter == PC_CENTER);
1578
1579 switch (channels)
1580 {
1581 case 1u:
1582 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1583 return true;
1584
1585 case 2u:
1586 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1587 return true;
1588
1589 case 3u:
1590 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1591 return true;
1592
1593 case 4u:
1594 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1595 return true;
1596
1597 case 5u:
1598 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1599 return true;
1600
1601 case 6u:
1602 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1603 return true;
1604
1605 case 7u:
1606 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1607 return true;
1608
1609 case 8u:
1610 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1611 return true;
1612
1613 default:
1614 break;
1615 }
1616 }
1617
1618 ocean_assert(false && "Invalid channel number");
1619 return false;
1620}
1621
1622template <typename T, unsigned int tChannels>
1623inline void FrameInterpolatorBilinear::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1624{
1625 ocean_assert(source != nullptr && target != nullptr);
1626 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1627 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1628
1629 const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1630 const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1631
1632 scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1633}
1634
1635template <typename T, unsigned int tChannels>
1636inline void FrameInterpolatorBilinear::scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1637{
1638 ocean_assert(source != nullptr && target != nullptr);
1639 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1640 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1641 ocean_assert(sourceX_s_targetX > 0.0);
1642 ocean_assert(sourceY_s_targetY > 0.0);
1643
1644 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1645 {
1646 FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1647 return;
1648 }
1649
1650 if (std::is_same<T, uint8_t>::value)
1651 {
1652 // we have a SIMD-based optimized version for 'uint8_t' data types
1653
1654 scale8BitPerChannel<tChannels>((const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1655 }
1656 else
1657 {
1658 using TScale = typename FloatTyper<T>::Type;
1659
1660 if (worker)
1661 {
1662 worker->executeFunction(Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1663 }
1664 else
1665 {
1666 scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1667 }
1668 }
1669}
1670
1671template <unsigned int tChannels>
1672inline void FrameInterpolatorBilinear::affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const CV::PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1673{
1674 // If applicable, apply an additional translation to the affine transformation.
1675 const SquareMatrix3 adjustedAffineTransform = source_A_target * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(targetOrigin.x()), Scalar(targetOrigin.y()), 1));
1676
1677 if (worker)
1678 {
1679 if (targetWidth >= 4u)
1680 {
1681#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1682 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1683 return;
1684#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1685 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1686 return;
1687#endif
1688 }
1689
1690 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1691 }
1692 else
1693 {
1694 if (targetWidth >= 4u)
1695 {
1696#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1697 affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1698 return;
1699#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1700 affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1701 return;
1702#endif
1703 }
1704
1705 affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1706 }
1707}
1708
1709template <unsigned int tChannels>
1710inline void FrameInterpolatorBilinear::homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1711{
1712 // we adjust the homography to address 'outputOrigin'
1713 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1714
1715 if (worker)
1716 {
1717 if (outputWidth >= 4u)
1718 {
1719#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1720 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1721 return;
1722#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1723 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1724 return;
1725#endif
1726 }
1727
1728 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1729 }
1730 else
1731 {
1732 if (outputWidth >= 4u)
1733 {
1734#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1735 homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1736 return;
1737#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1738 homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1739 return;
1740#endif
1741 }
1742
1743 homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1744 }
1745}
1746
1747template <typename T, unsigned int tChannels>
1748inline void FrameInterpolatorBilinear::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1749{
1750 if (std::is_same<T, uint8_t>::value)
1751 {
1752 homography8BitPerChannel<tChannels>((const uint8_t*)input, inputWidth, inputHeight, input_H_output, (const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1753 return;
1754 }
1755 else
1756 {
1757 // we adjust the homography to address 'outputOrigin'
1758 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1759
1760 if (worker)
1761 {
1762 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1763 }
1764 else
1765 {
1766 homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1767 }
1768 }
1769}
1770
1771template <unsigned int tChannels>
1772inline void FrameInterpolatorBilinear::homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1773{
1774 if (worker)
1775 {
1776 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1777 }
1778 else
1779 {
1780 homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1781 }
1782}
1783
1784template <unsigned int tChannels>
1785inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker)
1786{
1787 // we adjust the homography to address 'outputOrigin'
1788 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1789
1790 if (worker)
1791 {
1792 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1793 }
1794 else
1795 {
1796 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1797 }
1798}
1799
1800template <unsigned int tChannels>
1801inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1802{
1803 if (worker)
1804 {
1805 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1806 }
1807 else
1808 {
1809 homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1810 }
1811}
1812
1813template <unsigned int tChannels>
1814inline void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1815{
1816 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1817
1818 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1819
1820 if (worker)
1821 {
1822 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.height());
1823 }
1824 else
1825 {
1826 homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.height());
1827 }
1828}
1829
1830template <unsigned int tChannels>
1831inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1832{
1833 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1834
1835 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1836
1837 if (worker)
1838 {
1839 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.height(), 11u, 12u, 10u);
1840 }
1841 else
1842 {
1843 homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.height());
1844 }
1845}
1846
1847template <typename T, unsigned int tChannels>
1848inline void FrameInterpolatorBilinear::lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1849{
1850 if constexpr (std::is_same<T, uint8_t>::value)
1851 {
1852#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1853 if ((tChannels >= 1u && input_LT_output.sizeX() >= 8) || (tChannels >= 2u && input_LT_output.sizeX() >= 4))
1854 {
1855 // NEON implementation for 1 channel: min width 8; for 2+ channels: min width 4
1856
1857 if (worker)
1858 {
1859 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1860 }
1861 else
1862 {
1863 lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1864 }
1865
1866 return;
1867 }
1868#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1869
1870 if (worker)
1871 {
1872 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)input_LT_output.sizeY(), 9u, 10u, 20u);
1873 }
1874 else
1875 {
1876 lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1877 }
1878 }
1879 else
1880 {
1881 ocean_assert((!std::is_same<T, uint8_t>::value));
1882
1883 if (worker)
1884 {
1885 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1886 }
1887 else
1888 {
1889 lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1890 }
1891 }
1892}
1893
1894template <unsigned int tChannels>
1895inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1896{
1897 if (worker)
1898 {
1899 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 11u, 12u, 20u);
1900 }
1901 else
1902 {
1903 lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1904 }
1905}
1906
1907template <typename T, unsigned int tChannels>
1908void FrameInterpolatorBilinear::resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target, Worker* worker, const unsigned int binSizeInPixel, const T* borderColor)
1909{
1910 static_assert(tChannels >= 1u, "Invalid channel number!");
1911
1912 ocean_assert(sourceFrame != nullptr);
1913 ocean_assert(sourceCamera.isValid());
1914 ocean_assert(source_R_target.isOrthonormal());
1915 ocean_assert(targetCamera.isValid());
1916 ocean_assert(targetFrame != nullptr);
1917 ocean_assert(binSizeInPixel >= 1u);
1918
1919 const size_t binsX = std::max(1u, targetCamera.width() / binSizeInPixel);
1920 const size_t binsY = std::max(1u, targetCamera.height() / binSizeInPixel);
1921 CV::FrameInterpolatorBilinear::LookupTable lookupTable(targetCamera.width(), targetCamera.height(), binsX, binsY);
1922
1923 for (size_t yBin = 0; yBin <= lookupTable.binsY(); ++yBin)
1924 {
1925 for (size_t xBin = 0; xBin <= lookupTable.binsX(); ++xBin)
1926 {
1927 const Vector2 cornerPosition = lookupTable.binTopLeftCornerPosition(xBin, yBin);
1928
1929 constexpr bool makeUnitVector = false; // we don't need a unit/normalized vector as we project the vector into the camera again
1930
1931 const Vector3 rayI = source_R_target * targetCamera.vector(cornerPosition, makeUnitVector);
1932 const Vector3 rayIF = Vector3(rayI.x(), -rayI.y(), -rayI.z());
1933
1934 if (rayIF.z() > Numeric::eps())
1935 {
1936 const Vector2 projectedPoint = sourceCamera.projectToImageIF(rayIF);
1937
1938 lookupTable.setBinTopLeftCornerValue(xBin, yBin, projectedPoint - cornerPosition);
1939 }
1940 else
1941 {
1942 // simply a coordinate far outside the input
1943 lookupTable.setBinTopLeftCornerValue(xBin, yBin, Vector2(Scalar(sourceCamera.width() * 10u), Scalar(sourceCamera.height() * 10u)));
1944 }
1945 }
1946 }
1947
1948 lookup<T, tChannels>(sourceFrame, sourceCamera.width(), sourceCamera.height(), lookupTable, true /*offset*/, borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1949
1950 if (source_OLT_target)
1951 {
1952 *source_OLT_target = std::move(lookupTable);
1953 }
1954}
1955
1956template <unsigned int tChannels>
1957void FrameInterpolatorBilinear::rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker, const uint8_t* borderColor)
1958{
1959 static_assert(tChannels != 0u, "Invalid channel number!");
1960
1961 ocean_assert(source != nullptr && target != nullptr);
1962 ocean_assert(width >= 1u && height >= 1u);
1963
1964 if (worker)
1965 {
1966 worker->executeFunction(Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1967 }
1968 else
1969 {
1970 rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1971 }
1972}
1973
1974template <unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar>
1975inline void FrameInterpolatorBilinear::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result)
1976{
1977 static_assert(tChannels != 0u, "Invalid channel number!");
1978 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
1979
1980 ocean_assert(frame != nullptr && result != nullptr);
1981 ocean_assert(width != 0u && height != 0u);
1982
1983 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
1984
1985 ocean_assert(position.x() >= TScalar(0));
1986 ocean_assert(position.y() >= TScalar(0));
1987
1988 if constexpr (tPixelCenter == PC_TOP_LEFT)
1989 {
1990 ocean_assert(position.x() <= TScalar(width - 1u));
1991 ocean_assert(position.y() <= TScalar(height - 1u));
1992
1993 const unsigned int left = (unsigned int)(position.x());
1994 const unsigned int top = (unsigned int)(position.y());
1995 ocean_assert(left < width && top < height);
1996
1997 const TScalar tx = position.x() - TScalar(left);
1998 ocean_assert(tx >= 0 && tx <= 1);
1999 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2000 const unsigned int txi_ = 128u - txi;
2001
2002 const TScalar ty = position.y() - TScalar(top);
2003 ocean_assert(ty >= 0 && ty <= 1);
2004 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2005 const unsigned int tyi_ = 128u - tyi;
2006
2007 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2008 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2009
2010 const uint8_t* const topLeft = frame + top * frameStrideElements + tChannels * left;
2011
2012 const unsigned int txty = txi * tyi;
2013 const unsigned int txty_ = txi * tyi_;
2014 const unsigned int tx_ty = txi_ * tyi;
2015 const unsigned int tx_ty_ = txi_ * tyi_;
2016
2017 for (unsigned int n = 0u; n < tChannels; ++n)
2018 {
2019 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2020 }
2021 }
2022 else
2023 {
2024 ocean_assert(tPixelCenter == PC_CENTER);
2025
2026 ocean_assert(position.x() <= TScalar(width));
2027 ocean_assert(position.y() <= TScalar(height));
2028
2029 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2030 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2031
2032 const unsigned int left = (unsigned int)(xShifted);
2033 const unsigned int top = (unsigned int)(yShifted);
2034
2035 ocean_assert(left < width);
2036 ocean_assert(top < height);
2037
2038 const TScalar tx = xShifted - TScalar(left);
2039 const TScalar ty = yShifted - TScalar(top);
2040
2041 ocean_assert(tx >= 0 && tx <= 1);
2042 ocean_assert(ty >= 0 && ty <= 1);
2043
2044 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2045 const unsigned int txi_ = 128u - txi;
2046
2047 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2048 const unsigned int tyi_ = 128u - tyi;
2049
2050 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2051 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2052
2053 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2054
2055 const unsigned int txty = txi * tyi;
2056 const unsigned int txty_ = txi * tyi_;
2057 const unsigned int tx_ty = txi_ * tyi;
2058 const unsigned int tx_ty_ = txi_ * tyi_;
2059
2060 for (unsigned int n = 0u; n < tChannels; ++n)
2061 {
2062 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2063 }
2064 }
2065}
2066
2067template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar, typename TIntermediate>
2068inline void FrameInterpolatorBilinear::interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
2069{
2070 static_assert(tChannels != 0u, "Invalid channel number!");
2071 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
2072
2073 ocean_assert(frame != nullptr && result != nullptr);
2074 ocean_assert(width != 0u && height != 0u);
2075
2076 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2077
2078 ocean_assert(position.x() >= TScalar(0));
2079 ocean_assert(position.y() >= TScalar(0));
2080
2081 if constexpr (tPixelCenter == PC_TOP_LEFT)
2082 {
2083 ocean_assert(position.x() <= TScalar(width - 1u));
2084 ocean_assert(position.y() <= TScalar(height - 1u));
2085
2086 const unsigned int left = (unsigned int)(position.x());
2087 const unsigned int top = (unsigned int)(position.y());
2088
2089 const TScalar tx = position.x() - TScalar(left);
2090 ocean_assert(tx >= 0 && tx <= 1);
2091
2092 const TScalar ty = position.y() - TScalar(top);
2093 ocean_assert(ty >= 0 && ty <= 1);
2094
2095 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2096 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2097
2098 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2099
2100 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2101 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2102 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2103 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2104
2105 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2106
2107 for (unsigned int n = 0u; n < tChannels; ++n)
2108 {
2109 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2110 }
2111 }
2112 else
2113 {
2114 ocean_assert(tPixelCenter == PC_CENTER);
2115
2116 ocean_assert(position.x() <= TScalar(width));
2117 ocean_assert(position.y() <= TScalar(height));
2118
2119 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2120 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2121
2122 const unsigned int left = (unsigned int)(xShifted);
2123 const unsigned int top = (unsigned int)(yShifted);
2124
2125 ocean_assert(left < width);
2126 ocean_assert(top < height);
2127
2128 const TScalar tx = xShifted - TScalar(left);
2129 const TScalar ty = yShifted - TScalar(top);
2130
2131 ocean_assert(tx >= 0 && tx <= 1);
2132 ocean_assert(ty >= 0 && ty <= 1);
2133
2134 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2135 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2136
2137 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2138
2139 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2140 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2141 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2142 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2143
2144 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2145
2146 for (unsigned int n = 0u; n < tChannels; ++n)
2147 {
2148 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2149 }
2150 }
2151}
2152
2153template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
2154inline void FrameInterpolatorBilinear::interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements)
2155{
2156 static_assert(tChannels != 0u, "Invalid channel number!");
2157
2158 ocean_assert(frame && result);
2159
2160 const Vector2 pos(position.x() - Scalar(0.5), position.y() - Scalar(0.5));
2161
2162 // check whether the position is outside the frame and will therefore be 100% transparent
2163 if (pos.x() <= Scalar(-1) || pos.y() <= Scalar(-1) || pos.x() >= Scalar(width) || pos.y() >= Scalar(height))
2164 {
2165 for (unsigned int n = 0u; n < tChannels - 1u; ++n)
2166 {
2168 }
2169
2170 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2171
2172 return;
2173 }
2174
2175 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2176
2177 const int left = int(Numeric::floor(pos.x()));
2178 const int top = int(Numeric::floor(pos.y()));
2179
2180 ocean_assert(left >= -1 && left < int(width));
2181 ocean_assert(top >= -1 && top < int(height));
2182
2183 if ((unsigned int)left < width - 1u && (unsigned int)top < height - 1u)
2184 {
2185 // we have a valid pixel position for the left, top, right and bottom pixel
2186
2187 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2188 const unsigned int txi_ = 128u - txi;
2189
2190 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2191 const unsigned int tyi_ = 128u - tyi;
2192
2193 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2194
2195 const unsigned int txty = txi * tyi;
2196 const unsigned int txty_ = txi * tyi_;
2197 const unsigned int tx_ty = txi_ * tyi;
2198 const unsigned int tx_ty_ = txi_ * tyi_;
2199
2200 for (unsigned int n = 0u; n < tChannels; ++n)
2201 {
2202 result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2203 + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2204 }
2205 }
2206 else
2207 {
2208 // we do not have a valid pixel for all 4-neighborhood pixels
2209
2210 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2211 const unsigned int txi_ = 128u - txi;
2212
2213 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2214 const unsigned int tyi_ = 128u - tyi;
2215
2216 const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2217 const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2218
2219 ocean_assert(left < int(width) && top < int(height));
2220 const uint8_t* const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2221
2222 const unsigned int txty = txi * tyi;
2223 const unsigned int txty_ = txi * tyi_;
2224 const unsigned int tx_ty = txi_ * tyi;
2225 const unsigned int tx_ty_ = txi_ * tyi_;
2226
2227 for (unsigned int n = FrameBlender::SourceOffset<tAlphaAtFront>::data(); n < tChannels + FrameBlender::SourceOffset<tAlphaAtFront>::data() - 1u; ++n)
2228 {
2229 result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2230 + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2231 }
2232
2233 const uint8_t alphaTopLeft = (left >= 0 && top >= 0) ? topLeft[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2234 const uint8_t alphaTopRight = (left + 1u < width && top >= 0) ? topLeft[rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2235 const uint8_t alphaBottomLeft = (left >= 0 && top + 1u < height) ? topLeft[bottomOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2236 const uint8_t alphaBottomRight = (left + 1u < width && top + 1u < height) ? topLeft[bottomOffset + rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2237
2238 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = (alphaTopLeft * tx_ty_ + alphaTopRight * txty_ + alphaBottomLeft * tx_ty + alphaBottomRight * txty + 8192u) >> 14u;
2239 }
2240}
2241
2242template <unsigned int tChannels>
2243void FrameInterpolatorBilinear::affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberOutputRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2244{
2245 static_assert(tChannels >= 1u, "Invalid channel number!");
2246
2247 ocean_assert(source != nullptr && target != nullptr);
2248 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2249 ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2250 ocean_assert(source_A_target);
2251 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2252
2253 ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2254
2255 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2256
2257 const Scalar scalarSourceWidth_1 = Scalar(sourceWidth - 1u);
2258 const Scalar scalarSourceHeight_1 = Scalar(sourceHeight - 1u);
2259
2260 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2261
2262 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2263 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2264
2265 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2266 {
2267 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2268
2269 /*
2270 * We can slightly optimize the 3x3 matrix multiplication:
2271 *
2272 * | X0 Y0 Z0 | | x |
2273 * | X1 Y1 Z1 | * | y |
2274 * | 0 0 1 | | 1 |
2275 *
2276 * | xx | | X0 * x | | Y0 * y + Z0 |
2277 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2278 *
2279 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2280 *
2281 * C0 = Y0 * y + Z0
2282 * C1 = Y1 * y + Z1
2283 *
2284 * So the computation becomes:
2285 *
2286 * | x' | | X0 * x | | C0 |
2287 * | y' | = | X1 * x | + | C1 |
2288 */
2289
2290 const Vector2 X(source_A_target->data() + 0);
2291 const Vector2 c(Vector2(source_A_target->data() + 3) * Scalar(y) + Vector2(source_A_target->data() + 6));
2292
2293 for (unsigned int x = 0u; x < targetWidth; ++x)
2294 {
2295 const Vector2 sourcePosition = X * Scalar(x) + c;
2296
2297#ifdef OCEAN_DEBUG
2298 const Scalar debugSourceX = (*source_A_target)[0] * Scalar(x) + (*source_A_target)[3] * Scalar(y) + (*source_A_target)[6];
2299 const Scalar debugSourceY = (*source_A_target)[1] * Scalar(x) + (*source_A_target)[4] * Scalar(y) + (*source_A_target)[7];
2300 ocean_assert(sourcePosition.isEqual(Vector2(debugSourceX, debugSourceY), Scalar(0.01)));
2301#endif
2302
2303 if (sourcePosition.x() < Scalar(0) || sourcePosition.x() > scalarSourceWidth_1 || sourcePosition.y() < Scalar(0) || sourcePosition.y() > scalarSourceHeight_1)
2304 {
2305 *targetRow = *bColor;
2306 }
2307 else
2308 {
2309 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2310 }
2311
2312 targetRow++;
2313 }
2314 }
2315}
2316
2317template <unsigned int tChannels>
2318void FrameInterpolatorBilinear::homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2319{
2320 static_assert(tChannels >= 1u, "Invalid channel number!");
2321
2322 ocean_assert(input != nullptr && output != nullptr);
2323 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2324 ocean_assert(outputWidth > 0u && outputHeight > 0u);
2325 ocean_assert(input_H_output != nullptr);
2326
2327 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2328
2329 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2330
2331 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
2332 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
2333
2334 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2335
2336 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2337 const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2338
2339 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2340 {
2341 /*
2342 * We can slightly optimize the 3x3 matrix multiplication:
2343 *
2344 * | X0 Y0 Z0 | | x |
2345 * | X1 Y1 Z1 | * | y |
2346 * | X2 Y2 Z2 | | 1 |
2347 *
2348 * | xx | | X0 * x | | Y0 * y + Z0 |
2349 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2350 * | zz | | X2 * x | | Y2 * y + Z2 |
2351 *
2352 * | xx | | X0 * x | | C0 |
2353 * | yy | = | X1 * x | + | C1 |
2354 * | zz | | X2 * x | | C2 |
2355 *
2356 * As y is constant within the inner loop, we can pre-calculate the following terms:
2357 *
2358 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2359 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2360 */
2361
2362 const Vector2 X(input_H_output->data() + 0);
2363 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2364
2365 const Scalar X2 = (*input_H_output)(2, 0);
2366 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2367
2368 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2369
2370 for (unsigned int x = 0u; x < outputWidth; ++x)
2371 {
2372 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2373 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2374
2375#ifdef OCEAN_DEBUG
2376 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2377 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2378#endif
2379
2380 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
2381 {
2382 *outputRowPixel = bColor;
2383 }
2384 else
2385 {
2386 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2387 }
2388
2389 ++outputRowPixel;
2390 }
2391 }
2392}
2393
2394template <typename T, unsigned int tChannels>
2395void FrameInterpolatorBilinear::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2396{
2397 static_assert(tChannels >= 1u, "Invalid channel number!");
2398
2399 ocean_assert(input != nullptr && output != nullptr);
2400 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2401 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2402 ocean_assert(input_H_output != nullptr);
2403
2404 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2405
2406 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2407
2408 const Scalar scalarInputWidth1 = Scalar(inputWidth - 1u);
2409 const Scalar scalarInputHeight1 = Scalar(inputHeight - 1u);
2410
2411 // we need to find a best matching floating point data type for the intermediate interpolation results
2412 using TIntermediate = typename FloatTyper<T>::Type;
2413
2414 using PixelType = typename DataType<T, tChannels>::Type;
2415
2416 constexpr T zeroColor[tChannels] = {T(0)};
2417 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2418
2419 constexpr TIntermediate bias = TIntermediate(0);
2420
2421 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2422 {
2423 /*
2424 * We can slightly optimize the 3x3 matrix multiplication:
2425 *
2426 * | X0 Y0 Z0 | | x |
2427 * | X1 Y1 Z1 | * | y |
2428 * | X2 Y2 Z2 | | 1 |
2429 *
2430 * | xx | | X0 * x | | Y0 * y + Z0 |
2431 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2432 * | zz | | X2 * x | | Y2 * y + Z2 |
2433 *
2434 * | xx | | X0 * x | | C0 |
2435 * | yy | = | X1 * x | + | C1 |
2436 * | zz | | X2 * x | | C3 |
2437 *
2438 * As y is constant within the inner loop, we can pre-calculate the following terms:
2439 *
2440 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2441 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2442 */
2443
2444 const Vector2 X(input_H_output->data() + 0);
2445 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2446
2447 const Scalar X2 = (*input_H_output)(2, 0);
2448 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2449
2450 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2451
2452 for (unsigned int x = 0u; x < outputWidth; ++x)
2453 {
2454 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2455 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2456
2457#ifdef OCEAN_DEBUG
2458 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2459 ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2460#endif
2461
2462 if (inputPosition.x() >= Scalar(0) && inputPosition.x() <= scalarInputWidth1 && inputPosition.y() >= Scalar(0) && inputPosition.y() <= scalarInputHeight1)
2463 {
2464 interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2465 }
2466 else
2467 {
2468 *outputRowPixel = *bColor;
2469 }
2470
2471 ++outputRowPixel;
2472 }
2473 }
2474}
2475
2476#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2477
2478template <unsigned int tChannels>
2479inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2480{
2481 static_assert(tChannels >= 1u, "Invalid channel number!");
2482
2483 ocean_assert(source && target);
2484 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2485 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2486 ocean_assert(source_A_target);
2487 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2488
2489 ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2490
2491 const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2492 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2493
2494 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2495
2496 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2497 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2498
2499 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2500
2501 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2502 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2503 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2504 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2505
2506 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2507 const __m128 m128_f_X0 = _mm_set_ps1(float((*source_A_target)(0, 0)));
2508 const __m128 m128_f_X1 = _mm_set_ps1(float((*source_A_target)(1, 0)));
2509
2510 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2511 {
2512 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2513
2514 /*
2515 * We can slightly optimize the 3x3 matrix multiplication:
2516 *
2517 * | X0 Y0 Z0 | | x |
2518 * | X1 Y1 Z1 | * | y |
2519 * | 0 0 1 | | 1 |
2520 *
2521 * | xx | | X0 * x | | Y0 * y + Z0 |
2522 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2523 *
2524 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2525 *
2526 * C0 = Y0 * y + Z0
2527 * C1 = Y1 * y + Z1
2528 *
2529 * So the computation becomes:
2530 *
2531 * | x' | | X0 * x | | C0 |
2532 * | y' | = | X1 * x | + | C1 |
2533 */
2534
2535 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2536 const __m128 m128_f_C0 = _mm_set_ps1(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
2537 const __m128 m128_f_C1 = _mm_set_ps1(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
2538
2539 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2540 const __m128 m128_f_zero = _mm_setzero_ps();
2541
2542 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2543 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2544
2545 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
2546 const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2547
2548 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2549 const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(int(sourceWidth) - 1);
2550 const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(int(sourceHeight) - 1);
2551
2552 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2553 const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(float(sourceWidth - 1u));
2554 const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(float(sourceHeight - 1u));
2555
2556 for (unsigned int x = 0u; x < targetWidth; x += 4u)
2557 {
2558 if (x + 4u > targetWidth)
2559 {
2560 // the last iteration will not fit into the output frame,
2561 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2562
2563 ocean_assert(x >= 4u && targetWidth > 4u);
2564 const unsigned int newX = targetWidth - 4u;
2565
2566 ocean_assert(x > newX);
2567 targetRow -= x - newX;
2568
2569 x = newX;
2570
2571 // the for loop will stop after this iteration
2572 ocean_assert(!(x + 4u < targetWidth));
2573 }
2574
2575
2576 // we need four successive x coordinate floats:
2577 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2578 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2579
2580 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2581 const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2582 const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2583
2584 // now we check whether we are inside the input frame
2585 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero)); // inputPosition.x() <= (inputWidth - 1) && inputPosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
2586 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero)); // inputPosition.y() <= (inputHeight - 1) && inputPosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
2587
2588 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
2589
2590 // we can stop here if all pixels are invalid
2591 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2592 {
2593#ifdef OCEAN_DEBUG
2594 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2595 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2596 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2597#endif
2598
2599 targetRow[0] = *bColor;
2600 targetRow[1] = *bColor;
2601 targetRow[2] = *bColor;
2602 targetRow[3] = *bColor;
2603
2604 targetRow += 4;
2605
2606 continue;
2607 }
2608
2609 // we store the result
2610 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2611 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2612
2613
2614 // now we determine the left, top, right and bottom pixel used for the interpolation
2615 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2616 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2617
2618 // left = floor(x); top = floor(y)
2619 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2620 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2621
2622 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2623 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2624 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2625
2626 // offset = (y * sourceStrideElements + tChannels * x)
2627 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * sourceStrideElements + tChannels * left)
2628 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * sourceStrideElements + tChannels * right)
2629 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2630 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2631
2632 // we store the offsets
2633 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2634 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2635 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2636 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2637
2638
2639 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2640
2641 // we determine the fractional portions of the x' and y':
2642 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2643 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2644 __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2645 __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2646
2647 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2648 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2649 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2650
2651 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2652 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2653
2654 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2655 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2656
2657 interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2658 targetRow += 4;
2659 }
2660 }
2661}
2662
2663template <unsigned int tChannels>
2664inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2665{
2666 static_assert(tChannels >= 1u, "Invalid channel number!");
2667
2668 ocean_assert(input != nullptr && output != nullptr);
2669 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2670 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2671 ocean_assert(input_H_output != nullptr);
2672
2673 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2674
2675 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2676 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2677
2678 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2679
2680 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2681 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2682
2683 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2684
2685 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2686 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2687 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2688 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2689
2690 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2691 const __m128 m128_f_X0 = _mm_set_ps1(float((*input_H_output)(0, 0)));
2692 const __m128 m128_f_X1 = _mm_set_ps1(float((*input_H_output)(1, 0)));
2693 const __m128 m128_f_X2 = _mm_set_ps1(float((*input_H_output)(2, 0)));
2694
2695 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2696 const __m128 m128_f_zero = _mm_setzero_ps();
2697
2698 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2699 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2700
2701 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
2702 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2703
2704 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth -1, inputWidth -1], and same with inputHeight
2705 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(int(inputWidth) - 1);
2706 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(int(inputHeight) - 1);
2707
2708 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2709 const __m128 m128_f_inputWidth_1 = _mm_set_ps1(float(inputWidth - 1u));
2710 const __m128 m128_f_inputHeight_1 = _mm_set_ps1(float(inputHeight - 1u));
2711
2712 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2713 {
2714 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2715
2716 /*
2717 * We can slightly optimize the 3x3 matrix multiplication:
2718 *
2719 * | X0 Y0 Z0 | | x |
2720 * | X1 Y1 Z1 | * | y |
2721 * | X2 Y2 Z2 | | 1 |
2722 *
2723 * | xx | | X0 * x | | Y0 * y + Z0 |
2724 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2725 * | zz | | X2 * x | | Y2 * y + Z2 |
2726 *
2727 * | xx | | X0 * x | | C0 |
2728 * | yy | = | X1 * x | + | C1 |
2729 * | zz | | X2 * x | | C2 |
2730 *
2731 * As y is constant within the inner loop, we can pre-calculate the following terms:
2732 *
2733 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2734 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2735 */
2736
2737 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2738 const __m128 m128_f_C0 = _mm_set_ps1(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
2739 const __m128 m128_f_C1 = _mm_set_ps1(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
2740 const __m128 m128_f_C2 = _mm_set_ps1(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
2741
2742 for (unsigned int x = 0u; x < outputWidth; x += 4u)
2743 {
2744 if (x + 4u > outputWidth)
2745 {
2746 // the last iteration will not fit into the output frame,
2747 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2748
2749 ocean_assert(x >= 4u && outputWidth > 4u);
2750 const unsigned int newX = outputWidth - 4u;
2751
2752 ocean_assert(x > newX);
2753 outputPixelData -= x - newX;
2754
2755 x = newX;
2756
2757 // the for loop will stop after this iteration
2758 ocean_assert(!(x + 4u < outputWidth));
2759 }
2760
2761
2762 // we need four successive x coordinate floats:
2763 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2764 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2765
2766 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2767 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2768 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2769 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2770
2771#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
2772
2773 // we calculate the (approximated) inverse of zz,
2774 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
2775 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
2776 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2777
2778 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2779 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2780 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2781
2782#else
2783
2784 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2785 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2786 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2787
2788#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
2789
2790
2791 // now we check whether we are inside the input frame
2792 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero)); // inputPosition.x() <= (inputWidth-1) && inputPosition.x() >= 0 ? 0xFFFFFF : 0x000000
2793 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero)); // inputPosition.y() <= (inputHeight-1) && inputPosition.y() >= 0 ? 0xFFFFFF : 0x000000
2794
2795 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
2796
2797 // we can stop here if all pixels are invalid
2798 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2799 {
2800#ifdef OCEAN_DEBUG
2801 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2802 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2803 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2804#endif
2805
2806 outputPixelData[0] = *bColor;
2807 outputPixelData[1] = *bColor;
2808 outputPixelData[2] = *bColor;
2809 outputPixelData[3] = *bColor;
2810
2811 outputPixelData += 4;
2812
2813 continue;
2814 }
2815
2816 // we store the result
2817 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2818 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2819
2820
2821 // now we determine the left, top, right and bottom pixel used for the interpolation
2822 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2823 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2824
2825 // left = floor(x); top = floor(y)
2826 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2827 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2828
2829 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2830 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2831 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2832
2833 // offset = (y * inputStrideElements + tChannels * x)
2834 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * inputStrideElements + tChannels * left)
2835 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * inputStrideElements + tChannels * right)
2836 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2837 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2838
2839 // we store the offsets
2840 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2841 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2842 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2843 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2844
2845
2846 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2847
2848 // we determine the fractional portions of the x' and y':
2849 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2850 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2851 __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2852 __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2853
2854 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2855 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2856 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2857
2858 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2859 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2860
2861 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2862 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2863
2864 interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2865 outputPixelData += 4;
2866 }
2867 }
2868}
2869
2870template <>
2871OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2872{
2873 // sourcesTopLeft stores the three color values of 4 (independent) pixels (the upper left pixels):
2874 // FEDC BA98 7654 3210
2875 // ---- VUYV UYVU YVUY
2876 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2877
2878 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2879 // FEDC BA98 7654 3210
2880 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2881
2882
2883 // we will simply extract each channel from the source pixels,
2884 // each extracted channel will be multiplied by the corresponding interpolation factor
2885 // and all interpolation results will be accumulated afterwards
2886
2887 // FEDC BA98 7654 3210
2888 const __m128i mask32_Channel0 = SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull); // ---9 ---6 ---3 ---0
2889 const __m128i mask32_Channel1 = SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull); // ---A ---7 ---4 ---1
2890 const __m128i mask32_Channel2 = SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull); // ---B ---8 ---5 ---2
2891
2892
2893 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2894 // FEDC BA98 7654 3210
2895 // ---9 ---6 ---3 ---0
2896 // *
2897 // FTL3 FTL2 FTL1 FTL0
2898 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2899
2900 // we the same multiplication for the second channel
2901 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2902
2903 // and third channel
2904 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2905
2906
2907 // now we repeat the process for the top right pixel values
2908 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2909 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2910 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2911
2912
2913 // and for the bottom left pixel values
2914 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2915 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2916 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2917
2918
2919 // and for the bottom right pixel values
2920 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2921 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2922 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2923
2924
2925 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2926
2927 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
2928
2929 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
2930 // target data: ---9 ---6 ---3 ---0
2931 // shufflet target: ---- --9- -6-- 3--0
2932 // mask location: ---C ---8 ---4 ---0
2933 // mask: ---- --C- -8-- 4--0
2934 __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2935
2936 // target data: ---A ---7 ---4 ---1
2937 // shufflet target: ---- -A-- 7--4 --1-
2938 // mask location: ---C ---8 ---4 ---0
2939 // mask: ---- -C-- 8--4 --0-
2940 __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2941
2942 // target data: ---B ---8 ---5 ---2
2943 // shufflet target: ---- B--8 --5- -2--
2944 // mask location: ---C ---8 ---4 ---0
2945 // mask: ---- C--8 --4- -0--
2946 __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2947
2948
2949 // finally, we simply blend all interpolation results together
2950
2951 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2952}
2953
2954template <>
2955OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2956{
2957 // sourcesTopLeft stores the four color values of 4 (independent) pixels (the upper left pixels):
2958 // FEDC BA98 7654 3210
2959 // AVUY AVUY AVUY AVUY
2960 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2961
2962 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2963 // FEDC BA98 7654 3210
2964 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2965
2966
2967 // we will simply extract each channel from the source pixels,
2968 // each extracted channel will be multiplied by the corresponding interpolation factor
2969 // and all interpolation results will be accumulated afterwards
2970
2971 // FEDC BA98 7654 3210
2972 const __m128i mask32_Channel0 = SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull); // ---C ---8 ---4 ---0
2973 const __m128i mask32_Channel1 = SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull); // ---D ---9 ---5 ---1
2974 const __m128i mask32_Channel2 = SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull); // ---E ---A ---6 ---2
2975 const __m128i mask32_Channel3 = SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull); // ---F ---B ---7 ---3
2976
2977
2978 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2979 // FEDC BA98 7654 3210
2980 // ---C ---8 ---4 ---0
2981 // *
2982 // FTL3 FTL2 FTL1 FTL0
2983 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2984
2985 // we the same multiplication for the second channel
2986 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2987
2988 // and third channel
2989 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2990
2991 // and last channel
2992 __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
2993
2994
2995 // now we repeat the process for the top right pixel values
2996 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2997 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2998 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2999 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
3000
3001
3002 // and for the bottom left pixel values
3003 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
3004 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
3005 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
3006 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
3007
3008
3009 // and for the bottom right pixel values
3010 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
3011 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
3012 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
3013 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
3014
3015
3016 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3017
3018 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128)
3019
3020 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3021 // ---C ---8 ---4 ---0
3022 // ---C ---9 ---4 ---0
3023 __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3024
3025 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3026 // ---D ---9 ---5 ---1
3027 // --D- --9- --5- --1-
3028 __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3029
3030 // ---E ---A ---6 ---2
3031 // -E-- -A-- -6-- -2--
3032 __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3033
3034 // ---F ---B ---7 ---3
3035 // F--- B--- 7--- 3---
3036 __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3037
3038
3039 // finally, we simply blend all interpolation results together
3040
3041 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3042}
3043
3044#ifdef OCEAN_COMPILER_MSC
3045
3046// we see a significant performance decrease with non-VS compilers/platforms,
3047// so we do not use the 3channel version with non-Windows compilers
3048
3049template <>
3050OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3051{
3052 ocean_assert(source != nullptr);
3053 ocean_assert(targetPositionPixels != nullptr);
3054
3055 using PixelType = typename DataType<uint8_t, 1u>::Type;
3056
3057 // as we do not initialize the following intermediate data,
3058 // we hopefully will not allocate memory on the stack each time this function is called
3059 OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3060
3061 // we gather the individual source pixel values from the source image,
3062 // based on the calculated pixel locations
3063 for (unsigned int i = 0u; i < 4u; ++i)
3064 {
3065 if (validPixels[i])
3066 {
3067 pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3068 pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3069 pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3070 pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3071 }
3072 else
3073 {
3074 pixels[i * 4u + 0u] = borderColor;
3075 pixels[i * 4u + 1u] = borderColor;
3076 pixels[i * 4u + 2u] = borderColor;
3077 pixels[i * 4u + 3u] = borderColor;
3078 }
3079 }
3080
3081 static_assert(sizeof(__m128i) == sizeof(pixels), "Invalid data type!");
3082
3083 const __m128i m128_pixels = _mm_load_si128((const __m128i*)pixels);
3084
3085
3086 // factorLeft = 128 - factorRight
3087 // factorTop = 128 - factorBottom
3088
3089 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3090 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3091
3092 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3093 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3094
3095 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3096 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3097 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3098 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3099
3100 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3101 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3102 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3103
3104 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3105 // FEDC BA98 7654 3210
3106 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3107
3108
3109 // we will simply extract each channel from the source pixels,
3110 // each extracted channel will be multiplied by the corresponding interpolation factor
3111 // and all interpolation results will be accumulated afterwards
3112
3113 // FEDC BA98 7654 3210
3114 const __m128i mask32_topLeft = SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull); // ---C ---8 ---4 ---0
3115 const __m128i mask32_topRight = SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull); // ---D ---9 ---5 ---1
3116 const __m128i mask32_bottomLeft = SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull); // ---E ---A ---6 ---2
3117 const __m128i mask32_bottomRight = SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull); // ---F ---B ---7 ---3
3118
3119
3120 // we extract the top left values and multiply them with the interpolation factors
3121 // FEDC BA98 7654 3210
3122 // ---C ---8 ---4 ---0
3123 // *
3124 // FTL3 FTL2 FTL1 FTL0
3125 __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3126 __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3127
3128 multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3129 multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3130
3131 __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3132
3133 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3134
3135 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
3136 // additionally, we shuffle the individual results together
3137
3138 const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3139
3140 *((unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3141}
3142
3143template <>
3144OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3145{
3146 ocean_assert(source != nullptr);
3147 ocean_assert(targetPositionPixels != nullptr);
3148
3149 using PixelType = typename DataType<uint8_t, 3u>::Type;
3150
3151 // as we do not initialize the following intermediate data,
3152 // we hopefully will not allocate memory on the stack each time this function is called
3153 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3154 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3155 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3156 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3157
3158 // we gather the individual source pixel values from the source image,
3159 // based on the calculated pixel locations
3160 for (unsigned int i = 0u; i < 4u; ++i)
3161 {
3162 if (validPixels[i])
3163 {
3164 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3165 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3166 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3167 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3168 }
3169 else
3170 {
3171 topLeftPixels[i] = borderColor;
3172 topRightPixels[i] = borderColor;
3173 bottomLeftPixels[i] = borderColor;
3174 bottomRightPixels[i] = borderColor;
3175 }
3176 }
3177
3178 static_assert(sizeof(__m128i) <= sizeof(topLeftPixels), "Invalid data type!");
3179
3180 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3181 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3182 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3183 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3184
3185
3186 // factorLeft = 128 - factorRight
3187 // factorTop = 128 - factorBottom
3188
3189 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3190 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3191
3192 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3193 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3194
3195 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3196 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3197 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3198 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3199
3200
3201 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3202
3203 // we copy the first 12 bytes
3204 memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3205}
3206
3207#endif // OCEAN_COMPILER_MSC
3208
3209template <>
3210OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3211{
3212 ocean_assert(source != nullptr);
3213 ocean_assert(targetPositionPixels != nullptr);
3214
3215 using PixelType = typename DataType<uint8_t, 4u>::Type;
3216
3217 // as we do not initialize the following intermediate data,
3218 // we hopefully will not allocate memory on the stack each time this function is called
3219 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3220 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3221 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3222 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3223
3224 // we gather the individual source pixel values from the source image,
3225 // based on the calculated pixel locations
3226
3227 for (unsigned int i = 0u; i < 4u; ++i)
3228 {
3229 if (validPixels[i])
3230 {
3231 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3232 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3233 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3234 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3235 }
3236 else
3237 {
3238 topLeftPixels[i] = borderColor;
3239 topRightPixels[i] = borderColor;
3240 bottomLeftPixels[i] = borderColor;
3241 bottomRightPixels[i] = borderColor;
3242 }
3243 }
3244
3245 static_assert(sizeof(__m128i) == sizeof(topLeftPixels), "Invalid data type!");
3246
3247 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3248 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3249 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3250 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3251
3252
3253 // factorLeft = 128 - factorRight
3254 // factorTop = 128 - factorBottom
3255
3256 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3257 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3258
3259 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3260 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3261
3262 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3263 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3264 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3265 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3266
3267
3268 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3269
3270 _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3271}
3272
3273template <unsigned int tChannels>
3274OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3275{
3276 ocean_assert(source != nullptr);
3277 ocean_assert(targetPositionPixels != nullptr);
3278
3279 // as we do not initialize the following intermediate data,
3280 // we hopefully will not allocate memory on the stack each time this function is called
3281 OCEAN_ALIGN_DATA(16) unsigned int factorsTopLeft[4];
3282 OCEAN_ALIGN_DATA(16) unsigned int factorsTopRight[4];
3283 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomLeft[4];
3284 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomRight[4];
3285
3286
3287 // factorLeft = 128 - factorRight
3288 // factorTop = 128 - factorBottom
3289
3290 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3291 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3292
3293 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3294 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3295
3296 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3297 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3298 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3299 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3300
3301
3302 // we store the interpolation factors
3303 _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3304 _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3305 _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3306 _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3307
3308 for (unsigned int i = 0u; i < 4u; ++i)
3309 {
3310 if (validPixels[i])
3311 {
3312 const uint8_t* topLeft = source + offsetsTopLeft[i];
3313 const uint8_t* topRight = source + offsetsTopRight[i];
3314
3315 const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3316 const uint8_t* bottomRight = source + offsetsBottomRight[i];
3317
3318 const unsigned int& factorTopLeft = factorsTopLeft[i];
3319 const unsigned int& factorTopRight = factorsTopRight[i];
3320 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3321 const unsigned int& factorBottomRight = factorsBottomRight[i];
3322
3323 for (unsigned int n = 0u; n < tChannels; ++n)
3324 {
3325 ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3326 }
3327 }
3328 else
3329 {
3330 *targetPositionPixels = borderColor;
3331 }
3332
3333 targetPositionPixels++;
3334 }
3335}
3336
3337#endif // OCEAN_HARDWARE_SSE_VERSION
3338
3339#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3340
3341template <unsigned int tChannels>
3342void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
3343{
3344 static_assert(tChannels >= 1u, "Invalid channel number!");
3345
3346 ocean_assert(source && target);
3347 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3348 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
3349 ocean_assert(source_A_target);
3350 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
3351
3352 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3353
3354 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3355 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3356
3357 using PixelType = typename DataType<uint8_t, tChannels>::Type;
3358
3359 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3360 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3361
3362 unsigned int validPixels[4];
3363
3364 unsigned int topLeftOffsetsElements[4];
3365 unsigned int topRightOffsetsElements[4];
3366 unsigned int bottomLeftOffsetsElements[4];
3367 unsigned int bottomRightOffsetsElements[4];
3368
3369 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3370
3371 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3372 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*source_A_target)(0, 0)));
3373 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*source_A_target)(1, 0)));
3374
3375 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3376 {
3377 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3378
3379 /*
3380 * We can slightly optimize the 3x3 matrix multiplication:
3381 *
3382 * | X0 Y0 Z0 | | x |
3383 * | X1 Y1 Z1 | * | y |
3384 * | 0 0 1 | | 1 |
3385 *
3386 * | xx | | X0 * x | | Y0 * y + Z0 |
3387 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3388 *
3389 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
3390 *
3391 * C0 = Y0 * y + Z0
3392 * C1 = Y1 * y + Z1
3393 *
3394 * So the computation becomes:
3395 *
3396 * | x' | | X0 * x | | C0 |
3397 * | y' | = | X1 * x | + | C1 |
3398 */
3399
3400 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3401 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
3402 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
3403
3404 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3405 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3406
3407 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
3408 const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3409
3410 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3411 const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3412 const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3413
3414 // we store 4 floats: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3415 const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(float(sourceWidth - 1u));
3416 const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(float(sourceHeight - 1u));
3417
3418 for (unsigned int x = 0u; x < targetWidth; x += 4u)
3419 {
3420 if (x + 4u > targetWidth)
3421 {
3422 // the last iteration will not fit into the target frame,
3423 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3424
3425 ocean_assert(x >= 4u && targetWidth > 4u);
3426 const unsigned int newX = targetWidth - 4u;
3427
3428 ocean_assert(x > newX);
3429 targetRow -= x - newX;
3430
3431 x = newX;
3432
3433 // the for loop will stop after this iteration
3434 ocean_assert(!(x + 4u < targetWidth));
3435 }
3436
3437
3438 // we need four successive x coordinate floats:
3439 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3440 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3441 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3442
3443 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3444 const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3445 const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3446
3447
3448 // now we check whether we are inside the source frame
3449 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero)); // sourcePosition.x() <= (sourceWidth - 1) && sourcePosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
3450 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero)); // sourcePosition.y() <= (sourceHeight - 1) && sourcePosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
3451
3452 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_source_frame(sourcePosition) ? 0xFFFFFFFF : 0x00000000
3453
3454
3455 // we can stop here if all pixels are invalid
3456 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3457 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3458 {
3459#ifdef OCEAN_DEBUG
3460 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3461 vst1q_u32(debugValidPixels, m128_u_validPixel);
3462 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3463#endif
3464
3465 targetRow[0] = *bColor;
3466 targetRow[1] = *bColor;
3467 targetRow[2] = *bColor;
3468 targetRow[3] = *bColor;
3469
3470 targetRow += 4;
3471
3472 continue;
3473 }
3474
3475
3476 // we store the result
3477 vst1q_u32(validPixels, m128_u_validPixel);
3478 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3479
3480
3481 // now we determine the left, top, right and bottom pixel used for the interpolation
3482 // left = floor(x); top = floor(y)
3483 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3484 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3485
3486 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3487 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3488 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3489
3490 // offset = y * stride + x * channels
3491 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topLeftOffset = top * strideElements + left * channels
3492 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topRightOffset = top * strideElements + right * channels
3493 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements); // ...
3494 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3495
3496 // we store the offsets
3497 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3498 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3499 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3500 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3501
3502
3503 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3504
3505 // we determine the fractional portions of the x' and y':
3506 float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3507 float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3508
3509 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3510 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3511 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3512
3513 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3514 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3515
3516 if constexpr (tChannels > 4u)
3517 {
3518 // normally we would simply call instead of copying the code of the function to this location
3519 // however, if calling the function instead of applying the code here directly
3520 // clang ends with code approx. 20% slower
3521 // thus we make a copy of the code and keep the function for demonstration purposes
3522
3523 //interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetPixelData);
3524 //targetPixelData += 4;
3525
3526 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3527 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3528
3529 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3530 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3531 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3532 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3533 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3534 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3535
3536 unsigned int tx_ty_s[4];
3537 unsigned int txty_s[4];
3538 unsigned int tx_tys[4];
3539 unsigned int txtys[4];
3540
3541 // we store the interpolation factors
3542 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3543 vst1q_u32(txty_s, m128_u_txty_);
3544 vst1q_u32(tx_tys, m128_u_tx_ty);
3545 vst1q_u32(txtys, m128_u_txty);
3546
3547 for (unsigned int i = 0u; i < 4u; ++i)
3548 {
3549 if (validPixels[i])
3550 {
3551 ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3552 ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3553 ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3554 ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3555
3556 const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3557 const uint8_t* topRight = source + topRightOffsetsElements[i];
3558
3559 const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3560 const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3561
3562 const unsigned int tx_ty_ = tx_ty_s[i];
3563 const unsigned int txty_ = txty_s[i];
3564 const unsigned int tx_ty = tx_tys[i];
3565 const unsigned int txty = txtys[i];
3566
3567 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3568
3569 for (unsigned int n = 0u; n < tChannels; ++n)
3570 {
3571 ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3572 }
3573 }
3574 else
3575 {
3576 *targetRow = *bColor;
3577 }
3578
3579 targetRow++;
3580 }
3581 }
3582 else
3583 {
3584 interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3585 targetRow += 4;
3586 }
3587 }
3588 }
3589}
3590
3591template <unsigned int tChannels>
3592void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
3593{
3594 static_assert(tChannels >= 1u, "Invalid channel number!");
3595
3596 ocean_assert(input != nullptr && output != nullptr);
3597 ocean_assert(inputWidth > 0u && inputHeight > 0u);
3598 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
3599 ocean_assert(input_H_output != nullptr);
3600
3601 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3602
3603 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3604 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3605
3606 using PixelType = typename DataType<uint8_t, tChannels>::Type;
3607
3608 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3609 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3610
3611 unsigned int validPixels[4];
3612
3613 unsigned int topLeftOffsetsElements[4];
3614 unsigned int topRightOffsetsElements[4];
3615 unsigned int bottomLeftOffsetsElements[4];
3616 unsigned int bottomRightOffsetsElements[4];
3617
3618 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3619
3620 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3621 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
3622 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
3623 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
3624
3625 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3626 {
3627 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3628
3629 /*
3630 * We can slightly optimize the 3x3 matrix multiplication:
3631 *
3632 * | X0 Y0 Z0 | | x |
3633 * | X1 Y1 Z1 | * | y |
3634 * | X2 Y2 Z2 | | 1 |
3635 *
3636 * | xx | | X0 * x | | Y0 * y + Z0 |
3637 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3638 * | zz | | X2 * x | | Y2 * y + Z2 |
3639 *
3640 * | xx | | X0 * x | | C0 |
3641 * | yy | = | X1 * x | + | C1 |
3642 * | zz | | X2 * x | | C3 |
3643 *
3644 * As y is constant within the inner loop, we can pre-calculate the following terms:
3645 *
3646 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
3647 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
3648 */
3649
3650 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3651 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
3652 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
3653 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
3654
3655 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3656 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3657
3658 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
3659 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3660
3661 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3662 const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3663 const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3664
3665 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3666 const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(float(inputWidth - 1u));
3667 const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(float(inputHeight - 1u));
3668
3669 for (unsigned int x = 0u; x < outputWidth; x += 4u)
3670 {
3671 if (x + 4u > outputWidth)
3672 {
3673 // the last iteration will not fit into the output frame,
3674 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3675
3676 ocean_assert(x >= 4u && outputWidth > 4u);
3677 const unsigned int newX = outputWidth - 4u;
3678
3679 ocean_assert(x > newX);
3680 outputPixelData -= x - newX;
3681
3682 x = newX;
3683
3684 // the for loop will stop after this iteration
3685 ocean_assert(!(x + 4u < outputWidth));
3686 }
3687
3688
3689 // we need four successive x coordinate floats:
3690 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3691 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3692 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3693
3694 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3695 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3696 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3697 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3698
3699#ifdef USE_DIVISION_ARM64_ARCHITECTURE
3700
3701 // using the division available from ARM64 is more precise
3702 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3703 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3704
3705#else
3706
3707 // we calculate the (approximated) inverse of zz
3708 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
3709 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3710 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
3711
3712 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
3713 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3714 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3715
3716#endif // USE_DIVISION_ARM64_ARCHITECTURE
3717
3718
3719 // now we check whether we are inside the input frame
3720 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
3721 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
3722
3723 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
3724
3725
3726 // we can stop here if all pixels are invalid
3727 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3728 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3729 {
3730#ifdef OCEAN_DEBUG
3731 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3732 vst1q_u32(debugValidPixels, m128_u_validPixel);
3733 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3734#endif
3735
3736 outputPixelData[0] = *bColor;
3737 outputPixelData[1] = *bColor;
3738 outputPixelData[2] = *bColor;
3739 outputPixelData[3] = *bColor;
3740
3741 outputPixelData += 4;
3742
3743 continue;
3744 }
3745
3746
3747 // we store the result
3748 vst1q_u32(validPixels, m128_u_validPixel);
3749 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3750
3751
3752 // now we determine the left, top, right and bottom pixel used for the interpolation
3753 // left = floor(x); top = floor(y)
3754 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3755 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3756
3757 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3758 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3759 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3760
3761 // offset = y * stride + x * channels
3762 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topLeftOffset = top * strideElements + left * channels
3763 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topRightOffset = top * strideElements + right * channels
3764 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements); // ...
3765 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3766
3767 // we store the offsets
3768 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3769 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3770 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3771 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3772
3773
3774 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3775
3776 // we determine the fractional portions of the x' and y':
3777 float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3778 float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3779
3780 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3781 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3782 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3783
3784 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3785 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3786
3787 if constexpr (tChannels > 4u)
3788 {
3789 // normally we would simply call instead of copying the code of the function to this location
3790 // however, if calling the function instead of applying the code here directly
3791 // clang ends with code approx. 20% slower
3792 // thus we make a copy of the code and keep the function for demonstration purposes
3793
3794 //interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3795 //outputPixelData += 4;
3796
3797 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3798 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3799
3800 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3801 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3802 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3803 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3804 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3805 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3806
3807 unsigned int tx_ty_s[4];
3808 unsigned int txty_s[4];
3809 unsigned int tx_tys[4];
3810 unsigned int txtys[4];
3811
3812 // we store the interpolation factors
3813 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3814 vst1q_u32(txty_s, m128_u_txty_);
3815 vst1q_u32(tx_tys, m128_u_tx_ty);
3816 vst1q_u32(txtys, m128_u_txty);
3817
3818 for (unsigned int i = 0u; i < 4u; ++i)
3819 {
3820 if (validPixels[i])
3821 {
3822 ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3823 ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3824 ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3825 ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3826
3827 const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3828 const uint8_t* topRight = input + topRightOffsetsElements[i];
3829
3830 const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3831 const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3832
3833 const unsigned int tx_ty_ = tx_ty_s[i];
3834 const unsigned int txty_ = txty_s[i];
3835 const unsigned int tx_ty = tx_tys[i];
3836 const unsigned int txty = txtys[i];
3837
3838 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3839
3840 for (unsigned int n = 0u; n < tChannels; ++n)
3841 {
3842 ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3843 }
3844 }
3845 else
3846 {
3847 *outputPixelData = *bColor;
3848 }
3849
3850 outputPixelData++;
3851 }
3852 }
3853 else
3854 {
3855 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3856 outputPixelData += 4;
3857 }
3858 }
3859 }
3860}
3861
3862template <>
3863OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3864{
3865 ocean_assert(source != nullptr);
3866 ocean_assert(targetPositionPixels != nullptr);
3867
3868 // as we do not initialize the following intermediate data,
3869 // we hopefully will not allocate memory on the stack each time this function is called
3870 DataType<uint8_t, 1u>::Type pixels[16];
3871
3872 // we will store the pixel information in the following pattern:
3873 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3874 // BR3 BL3 TR3 TL3 BR2 BL2 TR2 TL2 BR1 BL1 TR1 TL1 BR0 BL0 TR0 TL0
3875
3876 // we gather the individual source pixel values from the source image,
3877 // based on the calculated pixel locations
3878 for (unsigned int i = 0u; i < 4u; ++i)
3879 {
3880 if (validPixels[i])
3881 {
3882 pixels[i * 4u + 0u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopLeftElements[i]));
3883 pixels[i * 4u + 1u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopRightElements[i]));
3884 pixels[i * 4u + 2u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomLeftElements[i]));
3885 pixels[i * 4u + 3u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomRightElements[i]));
3886 }
3887 else
3888 {
3889 pixels[i * 4u + 0u] = borderColor;
3890 pixels[i * 4u + 1u] = borderColor;
3891 pixels[i * 4u + 2u] = borderColor;
3892 pixels[i * 4u + 3u] = borderColor;
3893 }
3894 }
3895
3896 static_assert(sizeof(uint8x16_t) == sizeof(pixels), "Invalid data type!");
3897
3898 const uint8x16_t m128_pixels = vld1q_u8((const uint8_t*)pixels);
3899
3900
3901 // factorLeft = 128 - factorRight
3902 // factorTop = 128 - factorBottom
3903
3904 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3905 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3906
3907 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3908 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3909
3910 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3911 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3912 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3913 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3914
3915 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3916 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3917 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3918
3919 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3920 // FEDC BA98 7654 3210
3921 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3922
3923
3924 // we will simply extract each channel from the source pixels,
3925 // each extracted channel will be multiplied by the corresponding interpolation factor
3926 // and all interpolation results will be accumulated afterwards
3927
3928 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3929
3930 const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3931 const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3932 const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3933 const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3934
3935 const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3936
3937 // we add 8192 and shift by 14 bits
3938
3939 const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3940
3941 // finally we have the following result:
3942 // ---C ---8 ---4 ---0
3943 // and we need to extract the four pixel values:
3944 //
3945 // NOTE: Because of a possible bug in Clang affecting ARMv7, vget_lane_u32()
3946 // seems to assume 32-bit memory alignment for output location, which cannot
3947 // be guaranteed. This results in bus errors and crashes the application.
3948 // ARM64 is not affected.
3949#if defined(__aarch64__)
3950
3951 const uint8x8_t m64_mask0 = {0, 4, 1, 1, 1, 1, 1, 1};
3952 const uint8x8_t m64_mask1 = {1, 1, 0, 4, 1, 1, 1, 1};
3953
3954 const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3955 const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3956
3957 const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3958
3959 const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3960 memcpy(targetPositionPixels, &result, sizeof(uint32_t));
3961
3962#else
3963
3964 *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3965 *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3966 *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3967 *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3968
3969#endif
3970}
3971
3972OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels)
3973{
3974 const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16); // factorLeft = 128 - factorRight, factorTop = 128 - factorBottomv
3975
3976 const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3977 const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
3978
3979 const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
3980 const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
3981
3982 const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8); // intermediateTop = topLeft * factorLeft + topRight * factorRight
3983 const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8); // intermediateBottom = bottomLeft * factorLeft + bottomRight * factorRight
3984
3985 const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8)); // result = intermediateTop * factorTop + intermediateBottom + factorBottom
3986 const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
3987
3988 const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14)); // round(result / 16384.0)
3989
3990 const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
3991
3992 vst1_u8(targetPositionPixels, result_8x8);
3993}
3994
3995template <>
3996OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 2u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
3997{
3998 ocean_assert(source != nullptr);
3999 ocean_assert(targetPositionPixels != nullptr);
4000
4001 using PixelType = typename DataType<uint8_t, 2u>::Type;
4002
4003 // as we do not initialize the following intermediate data,
4004 // we hopefully will not allocate memory on the stack each time this function is called
4005 PixelType topPixels[8];
4006 PixelType bottomPixels[8];
4007
4008 // we will store the pixel information in the following pattern (here for YA):
4009 // FE DC BA 98 76 54 32 10
4010 // YA YA YA YA YA YA YA YA
4011 // TR TL TR TL TR TL TR TL
4012
4013 // we gather the individual source pixel values from the source image,
4014 // based on the calculated pixel locations
4015 for (unsigned int i = 0u; i < 4u; ++i)
4016 {
4017 if (validPixels[i])
4018 {
4019 *(topPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4020 *(topPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4021 *(bottomPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4022 *(bottomPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4023 }
4024 else
4025 {
4026 *(topPixels + i * 2u + 0u) = borderColor;
4027 *(topPixels + i * 2u + 1u) = borderColor;
4028 *(bottomPixels + i * 2u + 0u) = borderColor;
4029 *(bottomPixels + i * 2u + 1u) = borderColor;
4030 }
4031 }
4032
4033 static_assert(sizeof(uint32x4_t) == sizeof(topPixels), "Invalid data type!");
4034
4035 const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topPixels));
4036 const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomPixels));
4037
4038
4039 // factorLeft = 128 - factorRight
4040 // factorTop = 128 - factorBottom
4041
4042 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4043 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4044
4045 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4046 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4047
4048 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4049 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4050 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4051 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4052
4053
4054 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4055
4056 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4057 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4058
4059 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4060 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4061
4062 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4063 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4064
4065 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4066 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4067
4068
4069 // we add 8192 and shift by 14 bits
4070
4071 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4072 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4073
4074 // finaly we blend the interpolation results together to get the following pattern:
4075 // FE DC BA 98 76 54 32 10
4076 // 00 YA 00 YA 00 YA 00 YA
4077
4078 const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4079
4080 // we shuffle the 128 bit register to a 64 bit register:
4081
4082 const uint8x8_t m64_mask0 = {0, 1, 4, 5, 2, 2, 2, 2};
4083 const uint8x8_t m64_mask1 = {2, 2, 2, 2, 0, 1, 4, 5};
4084
4085 const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4086 const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4087
4088 const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4089
4090 // no we can store the following pattern as one block:
4091
4092 // 76 54 32 10
4093 // YA YA YA YA
4094
4095 vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4096}
4097
4098template <>
4099OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4100{
4101 ocean_assert(source != nullptr);
4102 ocean_assert(targetPositionPixels != nullptr);
4103
4104 // as we do not initialize the following intermediate data,
4105 // we hopefully will not allocate memory on the stack each time this function is called
4106 uint32_t topLeftPixels[4];
4107 uint32_t topRightPixels[4];
4108 uint32_t bottomLeftPixels[4];
4109 uint32_t bottomRightPixels[4];
4110
4111 // we will store the pixel information in the following pattern, note the padding byte after each pixel (here for RGB):
4112 // FEDCBA9876543210
4113 // BGR BGR BGR BGR
4114
4115 // we gather the individual source pixel values from the source image,
4116 // based on the calculated pixel locations
4117 for (unsigned int i = 0u; i < 4u; ++i)
4118 {
4119 if (validPixels[i])
4120 {
4121 memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i], sizeof(uint8_t) * 3);
4122 memcpy(topRightPixels + i, source + offsetsTopRightElements[i], sizeof(uint8_t) * 3);
4123 memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i], sizeof(uint8_t) * 3);
4124 memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i], sizeof(uint8_t) * 3);
4125 }
4126 else
4127 {
4128 memcpy(topLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4129 memcpy(topRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4130 memcpy(bottomLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4131 memcpy(bottomRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4132 }
4133 }
4134
4135 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4136
4137 const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4138 const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4139 const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4140 const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4141
4142
4143 // factorLeft = 128 - factorRight
4144 // factorTop = 128 - factorBottom
4145
4146 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4147 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4148
4149 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4150 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4151
4152 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4153 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4154 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4155 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4156
4157
4158 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4159
4160 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4161 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4162 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4163
4164 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4165 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4166 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4167
4168 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4169 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4170 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4171
4172 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4173 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4174 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4175
4176
4177 // we add 8192 and shift by 14 bits
4178
4179 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4180 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4181 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4182
4183 // finaly we blend the interpolation results together
4184
4185 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4186
4187 // we have to extract the get rid of the padding byte:
4188 // FEDCBA9876543210
4189 // BGR BGR BGR BGR
4190
4191 uint32_t intermediateBuffer[4];
4192 vst1q_u32(intermediateBuffer, m128_interpolation);
4193
4194 for (unsigned int i = 0u; i < 4u; ++i)
4195 {
4196 memcpy(targetPositionPixels + i, intermediateBuffer + i, sizeof(uint8_t) * 3);
4197 }
4198}
4199
4200template <>
4201OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4202{
4203 ocean_assert(source != nullptr);
4204 ocean_assert(targetPositionPixels != nullptr);
4205
4206 using PixelType = typename DataType<uint8_t, 4u>::Type;
4207
4208 // as we do not initialize the following intermediate data,
4209 // we hopefully will not allocate memory on the stack each time this function is called
4210 PixelType topLeftPixels[4];
4211 PixelType topRightPixels[4];
4212 PixelType bottomLeftPixels[4];
4213 PixelType bottomRightPixels[4];
4214
4215 // we will store the pixel information in the following pattern (here for RGBA):
4216 // FEDC BA98 7654 3210
4217 // ABGR ABGR ABGR ABGR
4218
4219 // we gather the individual source pixel values from the source image,
4220 // based on the calculated pixel locations
4221 for (unsigned int i = 0u; i < 4u; ++i)
4222 {
4223 if (validPixels[i])
4224 {
4225 *(topLeftPixels + i) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4226 *(topRightPixels + i) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4227 *(bottomLeftPixels + i) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4228 *(bottomRightPixels + i) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4229 }
4230 else
4231 {
4232 *(topLeftPixels + i) = borderColor;
4233 *(topRightPixels + i) = borderColor;
4234 *(bottomLeftPixels + i) = borderColor;
4235 *(bottomRightPixels + i) = borderColor;
4236 }
4237 }
4238
4239 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4240
4241 const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topLeftPixels));
4242 const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topRightPixels));
4243 const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomLeftPixels));
4244 const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomRightPixels));
4245
4246
4247 // factorLeft = 128 - factorRight
4248 // factorTop = 128 - factorBottom
4249
4250 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4251 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4252
4253 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4254 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4255
4256 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4257 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4258 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4259 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4260
4261
4262 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4263
4264 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4265 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4266 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4267 uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4268
4269 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4270 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4271 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4272 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4273
4274 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4275 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4276 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4277 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4278
4279 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4280 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4281 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4282 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4283
4284
4285 // we add 8192 and shift by 14 bits
4286
4287 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4288 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4289 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4290 const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4291
4292 // finaly we blend the interpolation results together
4293
4294 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4295
4296 vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4297}
4298
4299template <unsigned int tChannels>
4300OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4301{
4302 ocean_assert(source != nullptr);
4303 ocean_assert(targetPositionPixels != nullptr);
4304
4305 // as we do not initialize the following intermediate data,
4306 // we hopefully will not allocate memory on the stack each time this function is called
4307 unsigned int factorsTopLeft[4];
4308 unsigned int factorsTopRight[4];
4309 unsigned int factorsBottomLeft[4];
4310 unsigned int factorsBottomRight[4];
4311
4312
4313 // factorLeft = 128 - factorRight
4314 // factorTop = 128 - factorBottom
4315
4316 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4317 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4318
4319 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4320 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4321
4322 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4323 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4324 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4325 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4326
4327
4328 // we store the interpolation factors
4329 vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4330 vst1q_u32(factorsTopRight, m128_factorsTopRight);
4331 vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4332 vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4333
4334 for (unsigned int i = 0u; i < 4u; ++i)
4335 {
4336 if (validPixels[i])
4337 {
4338 const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4339 const uint8_t* topRight = source + offsetsTopRightElements[i];
4340
4341 const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4342 const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4343
4344 const unsigned int& factorTopLeft = factorsTopLeft[i];
4345 const unsigned int& factorTopRight = factorsTopRight[i];
4346 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4347 const unsigned int& factorBottomRight = factorsBottomRight[i];
4348
4349 for (unsigned int n = 0u; n < tChannels; ++n)
4350 {
4351 ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4352 }
4353 }
4354 else
4355 {
4356 *targetPositionPixels = borderColor;
4357 }
4358
4359 targetPositionPixels++;
4360 }
4361}
4362
4363#endif // OCEAN_HARDWARE_NEON_VERSION
4364
4365template <unsigned int tChannels>
4366inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4367{
4368 static_assert(tChannels >= 1u, "Invalid channel number!");
4369
4370 ocean_assert(input && output);
4371 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4372 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4373
4374 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4375 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4376 ocean_assert(homographies);
4377
4378 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4379
4380 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4381 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4382
4383 constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4384 const uint8_t* const bColor = borderColor ? borderColor : zeroColor;
4385
4386 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4387
4388 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4389 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4390
4391 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4392 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4393
4394 ocean_assert(right - left > Numeric::eps());
4395 ocean_assert(bottom - top > Numeric::eps());
4396
4397 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4398 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4399
4400 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4401 {
4402 for (unsigned int x = 0; x < outputWidth; ++x)
4403 {
4404 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4405
4406 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4407 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4408
4409 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4410
4411 const Scalar tx = 1 - _tx;
4412 const Scalar ty = 1 - _ty;
4413
4414 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4415 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4416 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4417 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4418
4419 const Scalar tTopLeft = tx * ty;
4420 const Scalar tTopRight = _tx * ty;
4421 const Scalar tBottomLeft = tx * _ty;
4422 const Scalar tBottomRight = _tx * _ty;
4423
4424 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4425 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4426
4427 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4428 {
4429 for (unsigned int c = 0u; c < tChannels; ++c)
4430 {
4431 outputData[c] = bColor[c];
4432 }
4433 }
4434 else
4435 {
4436 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4437 }
4438
4439 outputData += tChannels;
4440 }
4441
4442 outputData += outputPaddingElements;
4443 }
4444}
4445
4446template <unsigned int tChannels>
4447void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, unsigned int firstOutputRow, const unsigned int numberOutputRows)
4448{
4449 static_assert(tChannels >= 1u, "Invalid channel number!");
4450
4451 ocean_assert(input != nullptr && output != nullptr);
4452 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4453 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4454 ocean_assert(input_H_output != nullptr);
4455
4456 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4457
4458 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4459 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4460
4461 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4462 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4463
4464 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4465
4466 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4467 {
4468 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4469 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4470
4471 /*
4472 * We can slightly optimize the 3x3 matrix multiplication:
4473 *
4474 * | X0 Y0 Z0 | | x |
4475 * | X1 Y1 Z1 | * | y |
4476 * | X2 Y2 Z2 | | 1 |
4477 *
4478 * | x' | | X0 * x | | Y0 * y + Z0 |
4479 * | y' | = | X1 * x | + | Y1 * y + Z1 |
4480 * | z' | | X2 * x | | Y2 * y + Z2 |
4481 *
4482 * As y is constant within the inner loop, we can pre-calculate the following terms:
4483 *
4484 * | x' | | (X0 * x + constValue0) / (X2 * x + constValue2) |
4485 * | y' | = | (X1 * x + constValue1) / (X2 * x + constValue2) |
4486 *
4487 * | p | = | (X * x + c) / (X2 * x + constValue2) |
4488 */
4489
4490 const Vector2 X(input_H_output->data() + 0);
4491 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
4492
4493 const Scalar X2 = (*input_H_output)(2, 0);
4494 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
4495
4496 for (unsigned int x = 0; x < outputWidth; ++x)
4497 {
4498 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
4499
4500#ifdef OCEAN_DEBUG
4501 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
4502 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
4503#endif
4504
4505 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4506 {
4507 *outputMaskData = 0xFF - maskValue;
4508 }
4509 else
4510 {
4511 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4512 *outputMaskData = maskValue;
4513 }
4514
4515 outputData++;
4516 outputMaskData++;
4517 }
4518 }
4519}
4520
4521template <unsigned int tChannels>
4522inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4523{
4524 static_assert(tChannels >= 1u, "Invalid channel number!");
4525
4526 ocean_assert(input && output);
4527 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4528 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4529
4530 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4531 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4532 ocean_assert(homographies);
4533
4534 const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4535 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4536
4537 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4538 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4539
4540 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4541 outputMask += firstOutputRow * outputMaskStrideElements;
4542
4543 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4544 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4545
4546 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4547 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4548
4549 ocean_assert(right - left > Numeric::eps());
4550 ocean_assert(bottom - top > Numeric::eps());
4551
4552 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4553 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4554
4555 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4556 {
4557 for (unsigned int x = 0u; x < outputWidth; ++x)
4558 {
4559 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4560
4561 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4562 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4563
4564 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4565
4566 const Scalar tx = 1 - _tx;
4567 const Scalar ty = 1 - _ty;
4568
4569 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4570 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4571 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4572 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4573
4574 const Scalar tTopLeft = tx * ty;
4575 const Scalar tTopRight = _tx * ty;
4576 const Scalar tBottomLeft = tx * _ty;
4577 const Scalar tBottomRight = _tx * _ty;
4578
4579 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4580 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4581
4582 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4583 {
4584 *outputMask = 0xFFu - maskValue;
4585 }
4586 else
4587 {
4588 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4589 *outputMask = maskValue;
4590 }
4591
4592 outputData += tChannels;
4593 outputMask++;
4594 }
4595
4596 outputData += outputPaddingElements;
4597 outputMask += outputMaskPaddingElements;
4598 }
4599}
4600
4601template <unsigned int tChannels>
4602void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4603{
4604 static_assert(tChannels >= 1u, "Invalid channel number!");
4605
4606 ocean_assert(inputCamera && outputCamera && normalizedHomography);
4607 ocean_assert(input && output);
4608
4609 ocean_assert(firstRow + numberRows <= outputCamera->height());
4610
4611 const unsigned int outputStrideElements = tChannels * outputCamera->width() + outputPaddingElements;
4612
4613 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4614 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4615
4616 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4617
4618 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4619
4620 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4621 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4622
4623 uint8_t* outputData = output + firstRow * outputStrideElements;
4624
4625 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4626 {
4627 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4628 {
4629 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4630
4631 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4632 {
4633 *((PixelType*)outputData) = *bColor;
4634 }
4635 else
4636 {
4637 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4638 }
4639
4640 outputData += tChannels;
4641 }
4642
4643 outputData += outputPaddingElements;
4644 }
4645}
4646
4647template <unsigned int tChannels>
4648void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
4649{
4650 static_assert(tChannels >= 1u, "Invalid channel number!");
4651
4652 ocean_assert(inputCamera != nullptr && outputCamera != nullptr && normalizedHomography != nullptr);
4653 ocean_assert(input != nullptr && output != nullptr);
4654
4655 ocean_assert(firstRow + numberRows <= outputCamera->height());
4656
4657 const unsigned int outputStrideElements = outputCamera->width() * tChannels + outputPaddingElements;
4658 const unsigned int outputMaskStrideElements = outputCamera->width() + outputMaskPaddingElements;
4659
4660 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4661 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4662
4663 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4664
4665 uint8_t* outputData = output + firstRow * outputStrideElements;
4666 outputMask += firstRow * outputMaskStrideElements;
4667
4668 constexpr bool useDistortionParameters = true;
4669
4670 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4671 {
4672 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4673 {
4674 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4675
4676 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4677 {
4678 *outputMask = 0xFF - maskValue;
4679 }
4680 else
4681 {
4682 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4683 *outputMask = maskValue;
4684 }
4685
4686 outputData += tChannels;
4687 ++outputMask;
4688 }
4689
4690 outputData += outputPaddingElements;
4691 outputMask += outputMaskPaddingElements;
4692 }
4693}
4694
4695template <unsigned int tChannels>
4696void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4697{
4698 static_assert(tChannels >= 1u, "Invalid channel number!");
4699
4700 ocean_assert(input_LT_output != nullptr);
4701 ocean_assert(input != nullptr && output != nullptr);
4702
4703 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4704 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4705
4706 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4707
4708 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4709 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4710
4711 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4712
4713 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4714
4715 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4716
4717 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4718 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4719
4720 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4721 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4722
4723 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4724 {
4725 input_LT_output->bilinearValues(y, rowLookupData);
4726
4727 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4728
4729 for (unsigned int x = 0u; x < columns; ++x)
4730 {
4731 const Vector2& lookupValue = rowLookupData[x];
4732
4733 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4734
4735 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4736 {
4737 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4738 }
4739 else
4740 {
4741 *outputData = *bColor;
4742 }
4743
4744 outputData++;
4745 }
4746 }
4747}
4748
4749template <typename T, unsigned int tChannels>
4750void FrameInterpolatorBilinear::lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4751{
4752 static_assert(tChannels >= 1u, "Invalid channel number!");
4753
4754 ocean_assert((!std::is_same<uint8_t, T>::value));
4755
4756 ocean_assert(input_LT_output != nullptr);
4757 ocean_assert(input != nullptr && output != nullptr);
4758
4759 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4760 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4761
4762 using PixelType = typename DataType<T, tChannels>::Type;
4763
4764 const T zeroColor[tChannels] = {T(0)};
4765 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4766
4767 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4768
4769 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4770
4771 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4772
4773 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4774 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4775
4776 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4777 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4778
4779 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4780 {
4781 input_LT_output->bilinearValues(y, rowLookupData);
4782
4783 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4784
4785 for (unsigned int x = 0u; x < columns; ++x)
4786 {
4787 const Vector2& lookupValue = rowLookupData[x];
4788
4789 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4790
4791 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4792 {
4793 interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4794 }
4795 else
4796 {
4797 *outputData = *bColor;
4798 }
4799
4800 outputData++;
4801 }
4802 }
4803}
4804
4805#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4806
4807template <>
4808inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4809{
4810 ocean_assert(input_LT_output != nullptr);
4811 ocean_assert(input != nullptr && output != nullptr);
4812
4813 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4814 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4815
4816 using PixelType = uint8_t;
4817
4818 const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4819
4820 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
4821 ocean_assert(outputWidth >= 8u);
4822
4823 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4824
4825 const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4826 const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4827
4828 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4829 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
4830
4831 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
4832 const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
4833
4834 // [0.0f, 1.0f, 2.0f, 3.0f, ...]
4835 const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4836 const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4837 const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4838
4839 const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4840
4841 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4842
4843 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4844
4845 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
4846 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
4847
4848 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4849 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4850
4851 unsigned int validPixels[8];
4852
4853 unsigned int topLeftOffsetsElements[8];
4854 unsigned int bottomLeftOffsetsElements[8];
4855
4856 uint8_t pixels[32];
4857
4858 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4859 {
4860 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4861
4862 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
4863
4864 float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4865 float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4866
4867 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
4868
4869 for (unsigned int x = 0u; x < outputWidth; x += 8u)
4870 {
4871 if (x + 8u > outputWidth)
4872 {
4873 // the last iteration will not fit into the output frame,
4874 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
4875
4876 ocean_assert(x >= 8u && outputWidth > 8u);
4877 const unsigned int newX = outputWidth - 8u;
4878
4879 ocean_assert(x > newX);
4880 const unsigned int xOffset = x - newX;
4881
4882 outputPixelData -= xOffset;
4883
4884 if (offset)
4885 {
4886 additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(float(xOffset)));
4887 additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(float(xOffset)));
4888 }
4889
4890 x = newX;
4891
4892 // the for loop will stop after this iteration
4893 ocean_assert(!(x + 8u < outputWidth));
4894 }
4895
4896 const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 0u));
4897 const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 4u));
4898
4899 float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
4900 float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
4901
4902 float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
4903 float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
4904
4905 if (offset)
4906 {
4907 inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
4908 inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
4909
4910 inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
4911 inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
4912
4913 additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
4914 additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
4915 }
4916
4917 // now we check whether we are inside the input frame
4918 const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() < (inputWidth - 1) ? 0xFFFFFF : 0x000000
4919 const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
4920
4921 const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() < (inputHeight - 1) ? 0xFFFFFF : 0x000000
4922 const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
4923
4924 const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
4925 const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
4926
4927 vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
4928 vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
4929
4930
4931 const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
4932 const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
4933
4934 const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
4935 const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
4936
4937 const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4938 const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4939
4940
4941 const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
4942 vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
4943 const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
4944 vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
4945
4946 const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
4947 vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
4948 const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
4949 vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
4950
4951
4952 // we determine the fractional portions of the x' and y' and [0.0, 1.0] -> [0, 128]
4953 float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
4954 float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
4955
4956 float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
4957 float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
4958
4959 const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
4960 const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
4961
4962 const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
4963 const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
4964
4965 const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
4966 const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
4967
4968 const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
4969
4970
4971 vst1q_u8(pixels + 0, constantBorderColor_u_8x16); // initialize with border color
4972 vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
4973
4974 struct LeftRightPixel
4975 {
4976 uint8_t left;
4977 uint8_t right;
4978 };
4979
4980 static_assert(sizeof(LeftRightPixel) == 2, "Invalid data type!");
4981
4982 // we gather the individual source pixel values from the source image,
4983 // based on the calculated pixel locations
4984 for (unsigned int i = 0u; i < 8u; ++i)
4985 {
4986 if (validPixels[i])
4987 {
4988 ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u); // we need to have one additional pixel to the right (as we copy two pixels at once)
4989 ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
4990
4991 ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
4992 ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
4993 }
4994 }
4995
4996 const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
4997 const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
4998
4999 interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
5000
5001 outputPixelData += 8;
5002 }
5003 }
5004}
5005
5006template <unsigned int tChannels>
5007void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5008{
5009 ocean_assert(input_LT_output != nullptr);
5010 ocean_assert(input != nullptr && output != nullptr);
5011
5012 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5013 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5014
5015 using PixelType = typename DataType<uint8_t, tChannels>::Type;
5016
5017 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5018 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5019
5020 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
5021 ocean_assert(outputWidth >= 4u);
5022
5023 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5024
5025 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5026 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5027
5028 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5029 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
5030
5031 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
5032 const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
5033
5034 // [0.0f, 1.0f, 2.0f, 3.0f]
5035 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5036 float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5037
5038 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5039
5040 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5041
5042 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
5043 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
5044
5045 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5046 const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5047 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5048
5049 unsigned int validPixels[4];
5050
5051 unsigned int topLeftOffsetsElements[4];
5052 unsigned int topRightOffsetsElements[4];
5053 unsigned int bottomLeftOffsetsElements[4];
5054 unsigned int bottomRightOffsetsElements[4];
5055
5056 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5057 {
5058 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5059
5060 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
5061
5062 float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5063 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
5064
5065 for (unsigned int x = 0u; x < outputWidth; x += 4u)
5066 {
5067 if (x + 4u > outputWidth)
5068 {
5069 // the last iteration will not fit into the output frame,
5070 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
5071
5072 ocean_assert(x >= 4u && outputWidth > 4u);
5073 const unsigned int newX = outputWidth - 4u;
5074
5075 ocean_assert(x > newX);
5076 const unsigned int xOffset = x - newX;
5077
5078 outputPixelData -= xOffset;
5079
5080 if (offset)
5081 {
5082 additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(float(xOffset)));
5083 }
5084
5085 x = newX;
5086
5087 // the for loop will stop after this iteration
5088 ocean_assert(!(x + 4u < outputWidth));
5089 }
5090
5091 const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x));
5092
5093 float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5094 float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5095
5096 if (offset)
5097 {
5098 inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5099 inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5100
5101 additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5102 }
5103
5104 // now we check whether we are inside the input frame
5105 const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
5106 const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
5107
5108 const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
5109
5110 vst1q_u32(validPixels, validPixels_u_32x4);
5111
5112 const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5113 const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5114
5115 const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5116 const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5117
5118 const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
5119 const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5120 const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5121 const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5122
5123 vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5124 vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5125 vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5126 vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5127
5128 // we determine the fractional portions of the x' and y':
5129 float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5130 float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5131
5132 // we use integer interpolation [0.0, 1.0] -> [0, 128]
5133 tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5134 ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5135
5136 const uint32x4_t tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5137 const uint32x4_t ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5138
5139 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5140
5141 outputPixelData += 4;
5142 }
5143 }
5144}
5145
5146#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5147
5148template <unsigned int tChannels>
5149void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5150{
5151 ocean_assert(input_LT_output != nullptr);
5152 ocean_assert(input != nullptr && output != nullptr);
5153
5154 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5155 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5156
5157 using PixelType = typename DataType<uint8_t, tChannels>::Type;
5158
5159 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
5160
5161 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5162 const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5163
5164 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5165
5166 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
5167 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
5168
5169 Memory rowLookupMemory = Memory::create<Vector2>(columns);
5170 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
5171
5172 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5173 {
5174 input_LT_output->bilinearValues(y, rowLookupData);
5175
5176 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5177 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5178
5179 for (unsigned int x = 0u; x < columns; ++x)
5180 {
5181 const Vector2& lookupValue = rowLookupData[x];
5182
5183 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
5184
5185 if (inputPosition.x() >= 0 && inputPosition.y() >= 0 && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
5186 {
5187 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5188 *outputMaskData = maskValue;
5189 }
5190 else
5191 {
5192 *outputMaskData = 0xFFu - maskValue;
5193 }
5194
5195 outputData++;
5196 outputMaskData++;
5197 }
5198 }
5199}
5200
5201template <unsigned int tChannels>
5202void FrameInterpolatorBilinear::scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
5203{
5204 ocean_assert(source != nullptr && target != nullptr);
5205 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5206 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5207 ocean_assert(sourceX_s_targetX > 0.0);
5208 ocean_assert(sourceY_s_targetY > 0.0);
5209
5210 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5211 {
5212 FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5213 return;
5214 }
5215
5216 if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5217 {
5218#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5219 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5220 {
5221 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5222 return;
5223 }
5224#else
5225 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5226#endif
5227 }
5228 else
5229 {
5230 if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5231 {
5232#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5233 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5234 {
5235 scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5236 return;
5237 }
5238#endif
5239 }
5240
5241 scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5242 }
5243}
5244
5245template <unsigned int tChannels>
5246void FrameInterpolatorBilinear::scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5247{
5248 ocean_assert(source != nullptr && target != nullptr);
5249 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5250 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5251 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5252
5253 const Scalar sourceX_T_targetX = Scalar(sourceX_s_targetX);
5254 const Scalar sourceY_T_targetY = Scalar(sourceY_s_targetY);
5255
5256 /*
5257 * We determine the sub-pixel accurate source location for each target pixel as follows:
5258 *
5259 * Example with a downsampling by factor 4:
5260 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
5261 * targetRow with 3 pixels: | 0 1 2 |
5262 *
5263 * Thus, the source row can be separated into three blocks;
5264 * and we want to extract the color information from the center of the blocks:
5265 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
5266 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 4)
5267 *
5268 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
5269 * and subtract 0.5 again afterwards:
5270 * sourceX = (targetX + 0.5) * sourceX_s_targetX - 0.5
5271 *
5272 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
5273 * (1 + 0.5) * 4 - 0.5 = 5.5
5274 *
5275 *
5276 * Example with a downsampling by factor 3:
5277 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
5278 * targetRow with 3 pixels: | 0 1 2 |
5279 *
5280 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
5281 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 3)
5282 *
5283 * e.g., (0 + 0.5) * 3 - 0.5 = 1
5284 * (1 + 0.5) * 3 - 0.5 = 4
5285 *
5286 *
5287 * Example with a downsampling by factor 2:
5288 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
5289 * targetRow with 3 pixels: | 0 1 2 |
5290 *
5291 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
5292 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 2)
5293 *
5294 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
5295 * (1 + 0.5) * 2 - 0.5 = 2.5
5296 *
5297 *
5298 * we can simplify the calculation (as we have a constant term):
5299 * sourceX = (sourceX_s_targetX * targetX) + (sourceX_s_targetX * 0.5 - 0.5)
5300 */
5301
5302 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5303
5304 const Scalar sourceX_T_targetXOffset = sourceX_T_targetX * Scalar(0.5) - Scalar(0.5);
5305 const Scalar sourceY_T_targetYOffset = sourceY_T_targetY * Scalar(0.5) - Scalar(0.5);
5306
5307 const Scalar sourceWidth_1 = Scalar(sourceWidth - 1u);
5308 const Scalar sourceHeight_1 = Scalar(sourceHeight - 1u);
5309
5310 target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5311
5312 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5313 {
5314 const Scalar sy = minmax(Scalar(0), sourceY_T_targetYOffset + sourceY_T_targetY * Scalar(y), sourceHeight_1);
5315 ocean_assert(sy >= Scalar(0) && sy < Scalar(sourceHeight));
5316
5317 const unsigned int sTop = (unsigned int)sy;
5318 ocean_assert(sy >= Scalar(sTop));
5319
5320 const Scalar ty = sy - Scalar(sTop);
5321 ocean_assert(ty >= 0 && ty <= 1);
5322
5323 const unsigned int factorBottom = (unsigned int)(ty * Scalar(128) + Scalar(0.5));
5324 const unsigned int factorTop = 128u - factorBottom;
5325
5326 const uint8_t* const sourceTop = source + sourceStrideElements * sTop;
5327 const uint8_t* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5328
5329 for (unsigned int x = 0; x < targetWidth; ++x)
5330 {
5331 const Scalar sx = minmax(Scalar(0), sourceX_T_targetXOffset + sourceX_T_targetX * Scalar(x), sourceWidth_1);
5332 ocean_assert(sx >= Scalar(0) && sx < Scalar(sourceWidth));
5333
5334 const unsigned int sLeft = (unsigned int)sx;
5335 ocean_assert(sx >= Scalar(sLeft));
5336
5337 const Scalar tx = sx - Scalar(sLeft);
5338 ocean_assert(tx >= 0 && tx <= 1);
5339
5340 const unsigned int factorRight = (unsigned int)(tx * Scalar(128) + Scalar(0.5));
5341 const unsigned int factorLeft = 128u - factorRight;
5342
5343 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5344
5345 const uint8_t* const sourceTopLeft = sourceTop + sLeft * tChannels;
5346 const uint8_t* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5347
5348 const unsigned int factorTopLeft = factorTop * factorLeft;
5349 const unsigned int factorTopRight = factorTop * factorRight;
5350 const unsigned int factorBottomLeft = factorBottom * factorLeft;
5351 const unsigned int factorBottomRight = factorBottom * factorRight;
5352
5353 for (unsigned int n = 0u; n < tChannels; ++n)
5354 {
5355 target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5356 + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5357 }
5358
5359 target += tChannels;
5360 }
5361
5362 target += targetPaddingElements;
5363 }
5364}
5365
5366template <typename T>
5367void FrameInterpolatorBilinear::interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom)
5368{
5369 ocean_assert(sourceRowTop != nullptr);
5370 ocean_assert(sourceRowBottom != nullptr);
5371 ocean_assert(targetRow != nullptr);
5372 ocean_assert(elements >= 1u);
5373 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5374
5375 using FloatType = typename FloatTyper<T>::Type;
5376
5377 const FloatType internalFactorBottom = FloatType(factorBottom);
5378 const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5379
5380 for (unsigned int n = 0u; n < elements; ++n)
5381 {
5382 targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5383 }
5384}
5385
5386template <typename T, unsigned int tChannels>
5387void FrameInterpolatorBilinear::interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
5388{
5389 static_assert(tChannels != 0u, "Invalid channel number!");
5390
5391 ocean_assert(extendedSourceRow != nullptr);
5392 ocean_assert(targetRow != nullptr);
5393 ocean_assert(targetWidth >= 1u);
5394 ocean_assert(interpolationLocations != nullptr);
5395 ocean_assert(interpolationFactorsRight != nullptr);
5396 ocean_assert(channels == tChannels);
5397
5398 using FloatType = typename FloatTyper<T>::Type;
5399
5400 for (unsigned int x = 0u; x < targetWidth; ++x)
5401 {
5402 const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5403 ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5404
5405 const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5406
5407 const unsigned int& leftLocation = interpolationLocations[x];
5408 const unsigned int rightLocation = leftLocation + tChannels; // location is defined in relation to elements, not to pixels
5409
5410 for (unsigned int n = 0u; n < tChannels; ++n)
5411 {
5412 targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5413 }
5414 }
5415}
5416
5417#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5418
5419#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5420
5421template <>
5422inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5423{
5424 ocean_assert(source != nullptr && target != nullptr);
5425 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5426 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5427 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5428 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5429 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5430
5431 ocean_assert(sourcePaddingElements == 0u); // not supported
5432 ocean_assert(targetPaddingElements == 0u);
5433
5434 using PixelType = typename DataType<uint8_t, 2u>::Type;
5435
5436 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5437 const PixelType* const sourcePixelData = (const PixelType*)source;
5438
5439 // our offset values for the eight left pixels in relation to the first pixel of the row
5440 unsigned int leftOffsets[8];
5441
5442 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5443 // fixedPointLocation = floatLocation * 2^16
5444 //
5445 // [FEDCBA98, 76543210]
5446 // [pixel , subpixel]
5447 //
5448 // fixedPointLocation = pixel + subpixel / 2^16
5449 //
5450 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5451 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5452
5453 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5454 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5455
5456 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5457 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5458
5459 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5460 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5461
5462 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5463 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5464
5465 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5466 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5467
5468 // we store 4 integers: [0, 0, 0, 0]
5469 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5470
5471 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5472 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5473
5474 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5475 {
5476 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5477
5478 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5479 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5480 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5481
5482 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5483 // factorTop = 128 - factorBottom
5484 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5485
5486 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5487
5488 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5489 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5490
5491 for (unsigned int x = 0; x < targetWidth; x += 8u)
5492 {
5493 if (x + 8u > targetWidth)
5494 {
5495 // the last iteration will not fit into the output frame,
5496 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5497
5498 ocean_assert(x >= 8u && targetWidth > 8u);
5499 const unsigned int newX = targetWidth - 8u;
5500
5501 ocean_assert(x > newX);
5502 targetPixelData -= x - newX;
5503
5504 x = newX;
5505
5506 // the for loop will stop after this iteration
5507 ocean_assert(!(x + 8u < targetWidth));
5508 }
5509
5510
5511 // we need four successive x coordinate floats:
5512 // [x + 3, x + 2, x + 1; x + 0]
5513 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5514 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5515
5516 // we calculate the four source locations for our four target locations
5517 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5518 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5519
5520 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5521 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5522
5523 // now we determine the pixel/integer accurate source locations
5524 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5525 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5526 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5527
5528 // we store the offsets we have calculated
5529 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5530 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5531
5532
5533
5534 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
5535 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
5536
5537 uint8x8x2_t topLeftPixels;
5538 uint8x8x2_t topRightPixels;
5539
5540 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5541 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5542
5543 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5544 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5545
5546 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5547 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5548
5549 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5550 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5551
5552 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5553 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5554
5555 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5556 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5557
5558 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5559 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5560
5561 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5562 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5563
5564
5565 // we load the individual pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
5566
5567 uint8x8x2_t bottomLeftPixels;
5568 uint8x8x2_t bottomRightPixels;
5569
5570 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5571 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5572
5573 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5574 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5575
5576 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5577 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5578
5579 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5580 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5581
5582 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5583 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5584
5585 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5586 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5587
5588 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5589 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5590
5591 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5592 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5593
5594
5595
5596 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5597 // we need an accuracy of 7 bits (values between 0 and 128):
5598 // 76 54 32 10
5599 // [F3 F2 F1 F0]
5600 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5601 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5602
5603 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5604 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5605 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5606 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5607
5608
5609
5610 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5611 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5612 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5613
5614 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5615 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5616
5617 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5618 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5619
5620
5621
5622 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5623 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5624 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5625
5626 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5627 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5628
5629 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5630 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5631
5632
5633
5634 // finnally we determine the interpolation result between top and bottom row
5635 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5636 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5637
5638 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5639 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5640
5641
5642 // we narrow down the interpolation results and we store them
5643 uint8x8x2_t result;
5644 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5645 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5646
5647 // we write back the results and interleave them automatically
5648 vst2_u8((uint8_t*)targetPixelData, result);
5649
5650 targetPixelData += 8;
5651 }
5652
5653 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5654 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5655
5656 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5657
5658 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5659 {
5660 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5661
5662 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5663 ocean_assert(lastSourcePixelLeft < sourceWidth);
5664 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5665
5666 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5667
5668 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5669 const unsigned int factorLeft = 128u - factorRight;
5670
5671 for (unsigned int c = 0u; c < 2u; ++c)
5672 {
5673 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5674 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5675 }
5676 }
5677 }
5678}
5679
5680#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5681
5682#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5683
5684template <>
5685inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5686{
5687 ocean_assert(source != nullptr && target != nullptr);
5688 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5689 ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5690 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5691 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5692 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5693
5694 ocean_assert(sourcePaddingElements == 0u); // not supported
5695 ocean_assert(targetPaddingElements == 0u);
5696
5697 using PixelType = typename DataType<uint8_t, 2u>::Type;
5698
5699 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5700 const PixelType* const sourcePixelData = (const PixelType*)source;
5701
5702 // our offset values for the four left pixels in relation to the first pixel of the row
5703 unsigned int leftOffsets[8];
5704
5705 // our color values of the eight top and bottom pixels (32 bit = 16 bit left and 16 bit right)
5706 unsigned int topPixels[8];
5707 unsigned int bottomPixels[8];
5708
5709 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5710 // fixedPointLocation = floatLocation * 2^16
5711 //
5712 // [FEDCBA98, 76543210]
5713 // [pixel , subpixel]
5714 //
5715 // fixedPointLocation = pixel + subpixel / 2^16
5716 //
5717 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5718 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5719
5720 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5721 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5722
5723 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5724 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5725
5726 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5727 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5728
5729 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5730 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5731
5732 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5733 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5734
5735 // we store 4 integers: [0, 0, 0, 0]
5736 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5737
5738 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5739 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5740
5741 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5742 {
5743 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5744
5745 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5746 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5747 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5748
5749 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5750 // factorTop = 128 - factorBottom
5751 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5752
5753 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5754
5755 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5756 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5757
5758 for (unsigned int x = 0; x < targetWidth; x += 8u)
5759 {
5760 if (x + 8u > targetWidth)
5761 {
5762 // the last iteration will not fit into the output frame,
5763 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5764
5765 ocean_assert(x >= 8u && targetWidth > 8u);
5766 const unsigned int newX = targetWidth - 8u;
5767
5768 ocean_assert(x > newX);
5769 targetPixelData -= x - newX;
5770
5771 x = newX;
5772
5773 // the for loop will stop after this iteration
5774 ocean_assert(!(x + 8u < targetWidth));
5775 }
5776
5777
5778 // we need four successive x coordinate floats:
5779 // [x + 3, x + 2, x + 1; x + 0]
5780 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5781 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5782
5783 // we calculate the four source locations for our four target locations
5784 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5785 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5786
5787 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5788 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5789
5790 // now we determine the pixel/integer accurate source locations
5791 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5792 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5793 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5794
5795 // we store the offsets we have calculated
5796 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5797 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5798
5799
5800
5801 // we load the left and the right pixels into an intermediate buffer
5802 // with following pattern (with top-left TL, and top-right TR):
5803 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5804 // [TR3 TR3 TL3 TL3 TR2 TR2 TL2 TL2 TR1 TR1 TL1 TL1 TR0 TR0 TL0 TL0]
5805 // [TR7 TR7 TL7 TL7 TR6 TR6 TL6 TL6 TR5 TR5 TL5 TL5 TR4 TR4 TL4 TL4]
5806
5807 for (unsigned int n = 0u; n < 8u; ++n)
5808 {
5809 topPixels[n] = *(unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
5810 }
5811
5812 const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
5813 const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
5814
5815 for (unsigned int n = 0u; n < 8u; ++n)
5816 {
5817 bottomPixels[n] = *(unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
5818 }
5819
5820 const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
5821 const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
5822
5823
5824 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5825 // we need an accuracy of 7 bits (values between 0 and 128):
5826 // 76 54 32 10
5827 // [F3 F2 F1 F0]
5828 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5829 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5830
5831 // as we will have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5832 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5833 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5834
5835 // nw we have the interpolation factors for 8 left and 8 right pixels:
5836 // 7 6 5 4 3 2 1 0
5837 // [F7 F6 F5 F4 F3 F2 F1 F0]
5838 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5839
5840
5841 // we de-interleave the top pixels to left and right pixels:
5842 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5843 // [TL7 TL7 TL6 TL6 TL5 TL5 TL4 TL4 TL3 TL3 TL2 TL2 TL1 TL1 TL0 TL0]
5844 // [TR7 TR7 TR6 TR6 TR5 TR5 TR4 TR4 TR3 TR3 TR2 TR2 TR1 TR1 TR0 TR0]
5845 const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
5846
5847 // we de-interleave the pixels again to separate channel 0 and channel 1:
5848 // 7 6 5 4 3 2 1 0
5849 // channel 0: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5850 // channel 1: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5851 const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
5852 const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
5853
5854 const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
5855 const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
5856
5857 const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
5858 const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
5859
5860
5861 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5862 uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
5863 uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
5864
5865 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
5866 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
5867
5868 const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5869 const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5870
5871
5872 // we proceed with the bottom pixels (as we did with the top pixels)
5873 const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
5874
5875 const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
5876 const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
5877
5878 const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
5879 const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
5880
5881 const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
5882 const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
5883
5884
5885 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5886 m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
5887 m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
5888
5889 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
5890 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
5891
5892 const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5893 const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5894
5895
5896 // finnally we determine the interpolation result between top and bottom row
5897 m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
5898 m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
5899
5900 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
5901 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
5902
5903
5904 // we narrow down the interpolation results and we store them
5905 uint8x8x2_t m2_64_result;
5906 m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5907 m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5908
5909 // we write back the results and interleave them automatically
5910 vst2_u8((uint8_t*)targetPixelData, m2_64_result);
5911
5912 targetPixelData += 8;
5913 }
5914
5915 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5916 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5917
5918 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5919
5920 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5921 {
5922 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5923
5924 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5925 ocean_assert(lastSourcePixelLeft < sourceWidth);
5926 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5927
5928 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5929
5930 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5931 const unsigned int factorLeft = 128u - factorRight;
5932
5933 for (unsigned int c = 0u; c < 2u; ++c)
5934 {
5935 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5936 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5937 }
5938 }
5939 }
5940}
5941
5942#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5943
5944#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5945
5946template <>
5947inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5948{
5949 ocean_assert(source != nullptr && target != nullptr);
5950 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5951 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5952 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5953 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5954 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5955
5956 ocean_assert(sourcePaddingElements == 0u); // not supported
5957 ocean_assert(targetPaddingElements == 0u);
5958
5959 using PixelType = typename DataType<uint8_t, 3u>::Type;
5960
5961 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5962 const PixelType* const sourcePixelData = (const PixelType*)source;
5963
5964 // our offset values for the eight left pixels in relation to the first pixel of the row
5965 unsigned int leftOffsets[8];
5966
5967 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5968 // fixedPointLocation = floatLocation * 2^16
5969 //
5970 // [FEDCBA98, 76543210]
5971 // [pixel , subpixel]
5972 //
5973 // fixedPointLocation = pixel + subpixel / 2^16
5974 //
5975 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5976 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5977
5978 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5979 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5980
5981 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5982 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5983
5984 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5985 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5986
5987 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5988 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5989
5990 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5991 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5992
5993 // we store 4 integers: [0, 0, 0, 0]
5994 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5995
5996 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5997 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5998
5999 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6000 {
6001 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6002
6003 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6004 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6005 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6006
6007 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6008 // factorTop = 128 - factorBottom
6009 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6010
6011 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6012
6013 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6014 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6015
6016 for (unsigned int x = 0; x < targetWidth; x += 8u)
6017 {
6018 if (x + 8u > targetWidth)
6019 {
6020 // the last iteration will not fit into the output frame,
6021 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6022
6023 ocean_assert(x >= 8u && targetWidth > 8u);
6024 const unsigned int newX = targetWidth - 8u;
6025
6026 ocean_assert(x > newX);
6027 targetPixelData -= x - newX;
6028
6029 x = newX;
6030
6031 // the for loop will stop after this iteration
6032 ocean_assert(!(x + 8u < targetWidth));
6033 }
6034
6035
6036 // we need four successive x coordinate floats:
6037 // [x + 3, x + 2, x + 1; x + 0]
6038 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6039 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6040
6041 // we calculate the four source locations for our four target locations
6042 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6043 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6044
6045 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6046 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6047
6048 // now we determine the pixel/integer accurate source locations
6049 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6050 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6051 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6052
6053 // we store the offsets we have calculated
6054 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6055 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6056
6057
6058
6059 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6060 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6061
6062 uint8x8x3_t topLeftPixels;
6063 uint8x8x3_t topRightPixels;
6064
6065 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6066 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6067
6068 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6069 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6070
6071 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6072 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6073
6074 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6075 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6076
6077 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6078 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6079
6080 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6081 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6082
6083 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6084 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6085
6086 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6087 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6088
6089
6090 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6091
6092 uint8x8x3_t bottomLeftPixels;
6093 uint8x8x3_t bottomRightPixels;
6094
6095 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6096 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6097
6098 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6099 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6100
6101 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6102 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6103
6104 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6105 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6106
6107 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6108 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6109
6110 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6111 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6112
6113 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6114 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6115
6116 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6117 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6118
6119
6120
6121 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6122 // we need an accuracy of 7 bits (values between 0 and 128):
6123 // 76 54 32 10
6124 // [F3 F2 F1 F0]
6125 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6126 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6127
6128 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6129 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6130 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6131 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6132
6133
6134
6135 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6136 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6137 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6138 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6139
6140 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6141 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6142 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6143
6144 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6145 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6146 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6147
6148
6149
6150 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6151 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6152 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6153 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6154
6155 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6156 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6157 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6158
6159 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6160 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6161 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6162
6163
6164
6165 // finnally we determine the interpolation result between top and bottom row
6166 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6167 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6168 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6169
6170 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6171 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6172 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6173
6174
6175 // we narrow down the interpolation results and we store them
6176 uint8x8x3_t result;
6177 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6178 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6179 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6180
6181 // we write back the results and interleave them automatically
6182 vst3_u8((uint8_t*)targetPixelData, result);
6183
6184 targetPixelData += 8;
6185 }
6186
6187 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6188 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6189
6190 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6191
6192 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6193 {
6194 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6195
6196 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6197 ocean_assert(lastSourcePixelLeft < sourceWidth);
6198 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6199
6200 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6201
6202 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6203 const unsigned int factorLeft = 128u - factorRight;
6204
6205 for (unsigned int c = 0u; c < 3u; ++c)
6206 {
6207 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6208 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6209 }
6210 }
6211 }
6212}
6213
6214#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
6215
6216#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6217
6218/// \cond DOXYGEN_DO_NOT_DOCUMENT
6219
6220template <>
6221inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6222{
6223 ocean_assert(source != nullptr && target != nullptr);
6224 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6225 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6226 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6227 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6228 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6229
6230 ocean_assert(sourcePaddingElements == 0u); // not supported
6231 ocean_assert(targetPaddingElements == 0u);
6232
6233 using PixelType = typename DataType<uint8_t, 4u>::Type;
6234
6235 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6236 const PixelType* const sourcePixelData = (const PixelType*)source;
6237
6238 // our offset values for the eight left pixels in relation to the first pixel of the row
6239 unsigned int leftOffsets[8];
6240
6241 // this function uses fixed point numbers with 16 bit for the calculation of const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6242 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6243
6244 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
6245 // fixedPointLocation = floatLocation * 2^16
6246 //
6247 // [FEDCBA98, 76543210]
6248 // [pixel , subpixel]
6249 //
6250 // fixedPointLocation = pixel + subpixel / 2^16
6251 //
6252 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
6253 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
6254
6255 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6256 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6257
6258 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6259 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6260
6261 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6262 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6263
6264 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6265 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6266
6267 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
6268 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6269
6270 // we store 4 integers: [0, 0, 0, 0]
6271 const int32x4_t m128_s_zero = vdupq_n_s32(0);
6272
6273 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6274 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6275
6276 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6277 {
6278 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6279
6280 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6281 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6282 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6283
6284 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6285 // factorTop = 128 - factorBottom
6286 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6287
6288 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6289
6290 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6291 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6292
6293 for (unsigned int x = 0; x < targetWidth; x += 8u)
6294 {
6295 if (x + 8u > targetWidth)
6296 {
6297 // the last iteration will not fit into the output frame,
6298 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6299
6300 ocean_assert(x >= 8u && targetWidth > 8u);
6301 const unsigned int newX = targetWidth - 8u;
6302
6303 ocean_assert(x > newX);
6304 targetPixelData -= x - newX;
6305
6306 x = newX;
6307
6308 // the for loop will stop after this iteration
6309 ocean_assert(!(x + 8u < targetWidth));
6310 }
6311
6312
6313 // we need four successive x coordinate floats:
6314 // [x + 3, x + 2, x + 1; x + 0]
6315 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6316 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6317
6318 // we calculate the four source locations for our four target locations
6319 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6320 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6321
6322 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6323 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6324
6325 // now we determine the pixel/integer accurate source locations
6326 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6327 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6328 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6329
6330 // we store the offsets we have calculated
6331 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6332 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6333
6334
6335
6336 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6337 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6338
6339 uint8x8x4_t topLeftPixels;
6340 uint8x8x4_t topRightPixels;
6341
6342 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6343 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6344
6345 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6346 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6347
6348 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6349 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6350
6351 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6352 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6353
6354 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6355 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6356
6357 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6358 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6359
6360 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6361 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6362
6363 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6364 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6365
6366
6367 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6368
6369 uint8x8x4_t bottomLeftPixels;
6370 uint8x8x4_t bottomRightPixels;
6371
6372 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6373 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6374
6375 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6376 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6377
6378 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6379 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6380
6381 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6382 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6383
6384 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6385 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6386
6387 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6388 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6389
6390 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6391 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6392
6393 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6394 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6395
6396
6397
6398 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6399 // we need an accuracy of 7 bits (values between 0 and 128):
6400 // 76 54 32 10
6401 // [F3 F2 F1 F0]
6402 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6403 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6404
6405 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6406 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6407 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6408 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6409
6410
6411
6412 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6413 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6414 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6415 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6416 uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6417
6418 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6419 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6420 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6421 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6422
6423 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6424 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6425 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6426 uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6427
6428
6429
6430 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6431 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6432 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6433 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6434 m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6435
6436 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6437 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6438 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6439 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6440
6441 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6442 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6443 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6444 uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6445
6446
6447
6448 // finnally we determine the interpolation result between top and bottom row
6449 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6450 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6451 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6452 m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6453
6454 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6455 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6456 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6457 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6458
6459
6460 // we narrow down the interpolation results and we store them
6461 uint8x8x4_t result;
6462 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6463 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6464 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6465 result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6466
6467 // we write back the results and interleave them automatically
6468 vst4_u8((uint8_t*)targetPixelData, result);
6469
6470 targetPixelData += 8;
6471 }
6472
6473 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6474 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6475
6476 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6477
6478 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6479 {
6480 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6481
6482 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6483 ocean_assert(lastSourcePixelLeft < sourceWidth);
6484 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6485
6486 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6487
6488 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6489 const unsigned int factorLeft = 128u - factorRight;
6490
6491 for (unsigned int c = 0u; c < 4u; ++c)
6492 {
6493 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6494 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6495 }
6496 }
6497 }
6498}
6499
6500/// \endcond
6501
6502#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6503
6504template <>
6505inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(const float* sourceRowTop, const float* sourceRowBottom, float* targetRow, const unsigned int elements, const float factorBottom)
6506{
6507 ocean_assert(sourceRowTop != nullptr);
6508 ocean_assert(sourceRowBottom != nullptr);
6509 ocean_assert(targetRow != nullptr);
6510 ocean_assert(elements >= 16u);
6511 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6512
6513 // [1.0f, 1.0f, 1.0f, 1.0f]
6514 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6515
6516 const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6517 const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4); // factorTop = 1 - factorBottom
6518
6519 for (unsigned int n = 0u; n < elements; n += 16u)
6520 {
6521 if (n + 16u > elements)
6522 {
6523 // the last iteration will not fit into the output frame,
6524 // so we simply shift x left by some elements (at most 15) and we will calculate some elements again
6525
6526 ocean_assert(n >= 16u && elements > 16u);
6527 const unsigned int offset = n - (elements - 16u);
6528 ocean_assert(offset < 16u);
6529
6530 sourceRowTop -= offset;
6531 sourceRowBottom -= offset;
6532 targetRow -= offset;
6533
6534 // the for loop will stop after this iteration
6535 ocean_assert(!(n + 16u < elements));
6536 }
6537
6538 // loading the next four 32 bit values from the top and bottom row
6539 const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6540 const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6541 const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6542 const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6543
6544 const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6545 const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6546 const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6547 const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6548
6549 // interpolatedRow_32x4 = top_32x4 * factorsTop + bottom_32x4 * factorsBottom
6550 float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6551 float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6552 float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6553 float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6554
6555 interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6556 interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6557 interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6558 interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6559
6560 // writing back the four interpolated 32 bit results
6561 vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6562 vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6563 vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6564 vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6565
6566 sourceRowTop += 16;
6567 sourceRowBottom += 16;
6568 targetRow += 16;
6569 }
6570}
6571
6572template <>
6573inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(const float* extendedSourceRow, float* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
6574{
6575 ocean_assert(extendedSourceRow != nullptr);
6576 ocean_assert(targetRow != nullptr);
6577 ocean_assert(targetWidth >= 8u);
6578 ocean_assert(interpolationLocations != nullptr);
6579 ocean_assert(interpolationFactorsRight != nullptr);
6580
6581 ocean_assert(channels == 1u);
6582
6583 // [1.0f, 1.0f, 1.0f, 1.0f]
6584 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6585
6586 for (unsigned int x = 0; x < targetWidth; x += 8u)
6587 {
6588 if (x + 8u > targetWidth)
6589 {
6590 // the last iteration will not fit into the output frame,
6591 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6592
6593 ocean_assert(x >= 8u && targetWidth > 8u);
6594 const unsigned int newX = targetWidth - 8u;
6595
6596 ocean_assert(x > newX);
6597 const unsigned int offset = x - newX;
6598
6599 targetRow -= offset;
6600 interpolationLocations -= offset;
6601 interpolationFactorsRight -= offset;
6602
6603 x = newX;
6604
6605 // the for loop will stop after this iteration
6606 ocean_assert(!(x + 8u < targetWidth));
6607 }
6608
6609 // we load the left and the right pixels (for four resulting target pixels)
6610
6611 const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6612 const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6613 const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6614
6615 const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6616 const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6617 const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6618
6619 const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6620 const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6621 const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6622
6623 const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6624 const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6625 const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6626
6627 const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6628 const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6629 const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6630
6631 const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6632 const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6633 const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6634
6635 const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6636 const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6637
6638 const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6639 const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6640
6641 const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6642 const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6643
6644 const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6645 const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6646
6647 const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6648 const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6649
6650 vst1q_f32(targetRow + 0, result_0123_f_32x4);
6651 vst1q_f32(targetRow + 4, result_4567_f_32x4);
6652
6653 targetRow += 8;
6654 interpolationLocations += 8;
6655 interpolationFactorsRight += 8;
6656 }
6657}
6658
6659template <>
6660inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(const float* source, float* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6661{
6662 ocean_assert(source != nullptr && target != nullptr);
6663 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6664 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6665 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6666
6667 ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6668
6669 const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6670 const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6671
6672 using InterpolateRowVerticalFunction = void (*)(const float*, const float*, float*, const unsigned int, const float);
6673 using InterpolateRowHorizontalFunction = void (*)(const float*, float*, const unsigned int, const unsigned int, const unsigned int*, const float*);
6674
6675 InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6676 InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6677
6678 if (sourceWidth * 1u >= 16u)
6679 {
6680 interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6681 }
6682
6683 if (targetWidth >= 8u)
6684 {
6685 interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6686 }
6687
6688 target += targetStrideElements * firstTargetRow;
6689
6690 const float sourceX_T_targetX = float(sourceX_s_targetX);
6691 const float sourceY_T_targetY = float(sourceY_s_targetY);
6692
6693 // See the generic template function for a detailed documentation regarding interpolation factors.
6694
6695 Memory memoryIntermediateExtendedRow;
6696 Memory memoryHorizontalInterpolationLocations;
6697 Memory memoryHorizontalInterpolationFactorsRight;
6698
6699 if (sourceWidth != targetWidth)
6700 {
6701 // in case we are scaling the width of the frame, we use an intermediate buffer and pre-calculated interpolation locations and factors
6702
6703 memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u); // one additional pixel
6704
6705 memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth); // one offset for each target pixel
6706
6707 memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth); // one factors (right) for each target pixel
6708 }
6709
6710 if (memoryHorizontalInterpolationLocations)
6711 {
6712 ocean_assert(memoryHorizontalInterpolationFactorsRight);
6713
6714 if (targetWidth >= 4u)
6715 {
6716 const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6717 const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6718
6719 // [0.0f, 0.0f, 0.0f, 0.0f]
6720 const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6721
6722 // [4.0f, 4.0f, 4.0f, 4.0f]
6723 const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6724
6725 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1]
6726 const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6727
6728 // [0.0f, 1.0f, 2.0f, 3.0f]
6729 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6730 float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6731
6732 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6733
6734 for (unsigned int x = 0u; x < targetWidth; x += 4u)
6735 {
6736 if (x + 4u > targetWidth)
6737 {
6738 // the last iteration will not fit into the output frame,
6739 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
6740
6741 ocean_assert(x >= 4u && targetWidth > 4u);
6742 const unsigned int newX = targetWidth - 4u;
6743
6744 ocean_assert(x > newX);
6745 const unsigned int offset = x - newX;
6746
6747 x = newX;
6748
6749 x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(float(offset)));
6750
6751 // the for loop will stop after this iteration
6752 ocean_assert(!(x + 4u < targetWidth));
6753 }
6754
6755 // we calculate the four source locations for our four target locations
6756 const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6757
6758 // now we determine the pixel/integer accurate source locations
6759 // left = min(floor(sourceX), sourceWidth - 1)
6760 uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4); // no rounding here
6761
6762 // we store the offsets we have calculated
6763 vst1q_u32(memoryHorizontalInterpolationLocations.data<unsigned int>() + x, left_0123_u_32x4);
6764
6765 // factorRight = sourcceX - float(left)
6766 const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6767
6768 vst1q_f32(memoryHorizontalInterpolationFactorsRight.data<float>() + x, factorsRight_f_32x4);
6769
6770 // [x + 0, x + 1, x + 2, x + 3] + [4, 4, 4, 4]
6771 x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6772 }
6773 }
6774 else
6775 {
6776 const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6777
6778 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6779
6780 for (unsigned int x = 0u; x < targetWidth; ++x)
6781 {
6782 const float sourceX = max(0.0f, targetOffsetX + float(x) * sourceX_T_targetX);
6783
6784 const unsigned int left = min((unsigned int)sourceX, sourceWidth - 1u); // no rounding here
6785
6786 memoryHorizontalInterpolationLocations.data<unsigned int>()[x] = left;
6787
6788 const float factorRight = sourceX - float(left);
6789 ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6790
6791 memoryHorizontalInterpolationFactorsRight.data<float>()[x] = factorRight;
6792 }
6793 }
6794 }
6795
6796 const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
6797
6798 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6799 {
6800 const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY * float(y), float(sourceHeight) - 1.0f);
6801
6802 const unsigned int sourceRowTop = (unsigned int)sourceY; // we must not round here
6803 const float factorBottom = sourceY - float(sourceRowTop);
6804 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6805
6806 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6807
6808 const float* const sourceTopRow = source + sourceStrideElements * sourceRowTop;
6809 const float* const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
6810
6811 float* targetRow = nullptr;
6812
6813 if (sourceHeight == targetHeight)
6814 {
6815 ocean_assert(sourceWidth != targetWidth);
6816 ocean_assert(memoryIntermediateExtendedRow);
6817
6818 // we do not need to interpolate two lines, thus we simply need to copy the row (as we need an additional pixel at the end)
6819 memcpy(memoryIntermediateExtendedRow.data<float>(), sourceTopRow, sourceWidth * sizeof(float));
6820 }
6821 else
6822 {
6823 // in case we do not scale the width of the frame, we can write the result to the target frame directly
6824 targetRow = memoryIntermediateExtendedRow.isNull() ? target : memoryIntermediateExtendedRow.data<float>();
6825
6826 ocean_assert(targetRow != nullptr);
6827 ocean_assert(interpolateRowVerticalFunction != nullptr);
6828 interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
6829 }
6830
6831 if (memoryIntermediateExtendedRow) // sourceWidth != targetWidth
6832 {
6833 // we use an extended row (with one additional pixel at the end - equal to the last pixel)
6834 // so we have to copy the last pixel
6835 memoryIntermediateExtendedRow.data<float>()[sourceWidth] = memoryIntermediateExtendedRow.data<float>()[sourceWidth - 1u];
6836
6837 interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.data<float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.data<unsigned int>(), memoryHorizontalInterpolationFactorsRight.data<float>());
6838 }
6839
6840 target += targetStrideElements;
6841 }
6842}
6843
6844#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
6845
6846template <typename T, typename TScale, unsigned int tChannels>
6847void FrameInterpolatorBilinear::scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6848{
6849 static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value), "Invalid TScale type");
6850
6851 ocean_assert(source != nullptr && target != nullptr);
6852 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
6853 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
6854 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6855
6856 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
6857 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
6858
6859 const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
6860 const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
6861
6862 /*
6863 * We determine the sub-pixel accurate source location for each target pixel as follows:
6864 *
6865 * Example with a downsampling by factor 4:
6866 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
6867 * targetRow with 3 pixels: | 0 1 2 |
6868 *
6869 * Thus, the source row can be separated into three blocks;
6870 * and we want to extract the color information from the center of the blocks:
6871 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
6872 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 4)
6873 *
6874 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
6875 * and subtract 0.5 again afterwards:
6876 * sourceX = (targetX + 0.5) * targetTSourceX - 0.5
6877 *
6878 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
6879 * (1 + 0.5) * 4 - 0.5 = 5.5
6880 *
6881 *
6882 * Example with a downsampling by factor 3:
6883 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
6884 * targetRow with 3 pixels: | 0 1 2 |
6885 *
6886 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
6887 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 3)
6888 *
6889 * e.g., (0 + 0.5) * 3 - 0.5 = 1
6890 * (1 + 0.5) * 3 - 0.5 = 4
6891 *
6892 *
6893 * Example with a downsampling by factor 2:
6894 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
6895 * targetRow with 3 pixels: | 0 1 2 |
6896 *
6897 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
6898 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 2)
6899 *
6900 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
6901 * (1 + 0.5) * 2 - 0.5 = 2.5
6902 *
6903 *
6904 * we can simplify the calculation (as we have a constant term):
6905 * sourceX = (targetX * targetTSourceX) + (0.5 * targetTSourceX - 0.5)
6906 */
6907
6908 const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
6909 const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
6910
6911 const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
6912 const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
6913
6914 target += targetStrideElements * firstTargetRow;
6915
6916 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6917 {
6918 const TScale sy = minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
6919 ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
6920
6921 const unsigned int sTop = (unsigned int)sy;
6922 ocean_assert(sy >= TScale(sTop));
6923
6924 const TScale factorBottom = sy - TScale(sTop);
6925 ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
6926
6927 const TScale factorTop = TScale(1) - factorBottom;
6928 ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
6929
6930 const T* const sourceTop = source + sTop * sourceStrideElements;
6931 const T* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
6932
6933 for (unsigned int x = 0; x < targetWidth; ++x)
6934 {
6935 const TScale sx = minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
6936 ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
6937
6938 const unsigned int sLeft = (unsigned int)sx;
6939 ocean_assert(sx >= TScale(sLeft));
6940
6941 const TScale factorRight = sx - TScale(sLeft);
6942 ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
6943
6944 const TScale factorLeft = TScale(1) - factorRight;
6945 ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
6946
6947 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
6948
6949 const T* const sourceTopLeft = sourceTop + sLeft * tChannels;
6950 const T* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
6951
6952 const TScale factorTopLeft = factorTop * factorLeft;
6953 const TScale factorTopRight = factorTop * factorRight;
6954 const TScale factorBottomLeft = factorBottom * factorLeft;
6955 const TScale factorBottomRight = factorBottom * factorRight;
6956
6957 for (unsigned int n = 0u; n < tChannels; ++n)
6958 {
6959 target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
6960 + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
6961 }
6962
6963 target += tChannels;
6964 }
6965
6966 target += targetPaddingElements;
6967 }
6968}
6969
6970template <unsigned int tChannels>
6971void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6972{
6973 static_assert(tChannels != 0u, "Invalid channel number!");
6974
6975 ocean_assert(firstTargetRow + numberTargetRows <= height);
6976
6977 using PixelType = typename DataType<uint8_t, tChannels>::Type;
6978
6979 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
6980
6981 uint8_t zeroColor[tChannels] = {uint8_t(0)};
6982 const PixelType bColor = borderColor ? *(const PixelType*)borderColor : *(const PixelType*)zeroColor;
6983
6984 const SquareMatrix3 rotationMatrix3(Rotation(0, 0, 1, angle));
6985 const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
6986
6987 const Scalar width_1 = Scalar(width - 1u);
6988 const Scalar height_1 = Scalar(height - 1u);
6989 const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
6990
6991 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6992 {
6993 PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
6994
6995 const Scalar floatY = Scalar(y);
6996
6997 for (unsigned int x = 0; x < width; ++x)
6998 {
6999 const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (Vector2(Scalar(x), floatY) - anchorPosition));
7000
7001 if (sourceLocation.x() >= 0 && sourceLocation.y() >= 0 && sourceLocation.x() <= width_1 && sourceLocation.y() <= height_1)
7002 {
7003 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
7004 }
7005 else
7006 {
7007 *targetPixel = bColor;
7008 }
7009
7010 ++targetPixel;
7011 }
7012 }
7013}
7014
7015} // namespace CV
7016
7017} // namespace Ocean
7018
7019#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:131
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual bool isValid() const =0
Returns whether this camera is valid.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition FrameBlender.h:1171
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorBilinear.h:60
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:1530
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1437
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorBilinear.h:341
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition FrameInterpolatorBilinear.h:44
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:4300
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition FrameInterpolatorBilinear.h:1748
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition FrameInterpolatorBilinear.h:1908
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1814
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1848
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1975
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition FrameInterpolatorBilinear.h:2479
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 &center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1831
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4522
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1801
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:3592
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition FrameInterpolatorBilinear.h:2395
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1785
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition FrameInterpolatorBilinear.h:3972
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition FrameInterpolatorBilinear.h:4366
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4602
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition FrameInterpolatorBilinear.h:1672
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4648
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition FrameInterpolatorBilinear.h:3342
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition FrameInterpolatorBilinear.h:5007
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5422
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition FrameInterpolatorBilinear.h:5387
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6971
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1895
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:3274
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1772
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition FrameInterpolatorBilinear.h:4696
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition FrameInterpolatorBilinear.h:1623
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition FrameInterpolatorBilinear.h:1636
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition FrameInterpolatorBilinear.h:4750
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5246
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:1957
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition FrameInterpolatorBilinear.h:5367
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1710
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition FrameInterpolatorBilinear.h:2154
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:5149
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition FrameInterpolatorBilinear.h:2243
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2664
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition FrameInterpolatorBilinear.h:5202
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4447
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6847
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2318
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:2068
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:63
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:468
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:456
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
Template class allowing to define an array of data types.
Definition DataType.h:27
This class implements Ocean's image class.
Definition Frame.h:1879
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4317
bool isValid() const
Returns whether this frame is valid.
Definition Frame.h:4612
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4312
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4302
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4307
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition Lookup2.h:959
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition Lookup2.h:953
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition Lookup2.h:1786
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition Lookup2.h:2128
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition Lookup2.h:1864
This class implements an object able to allocate memory.
Definition base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition Numeric.h:2035
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2096
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition Numeric.h:2246
unsigned int width() const
Returns the width of the camera image.
Definition PinholeCamera.h:1452
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition PinholeCamera.h:1333
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition PinholeCamera.h:1327
unsigned int height() const
Returns the height of the camera image.
Definition PinholeCamera.h:1458
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition PinholeCamera.h:1792
This class implements a 2x2 square matrix.
Definition SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1334
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1047
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition SquareMatrix3.h:1366
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
const T & z() const noexcept
Returns the z value.
Definition Vector3.h:836
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition base/Utilities.h:973
PixelCenter
Definition of individual centers of pixels.
Definition CV.h:117
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition CV.h:133
@ PC_CENTER
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition CV.h:150
float Scalar
Definition of a scalar type.
Definition Math.h:129
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:43
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition Rotation.h:32
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition DataType.h:373