Ocean
Loading...
Searching...
No Matches
FrameInterpolatorBilinear.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
10
11#include "ocean/cv/CV.h"
14#include "ocean/cv/SSE.h"
15
16#include "ocean/base/DataType.h"
17#include "ocean/base/Frame.h"
18#include "ocean/base/Memory.h"
19#include "ocean/base/Worker.h"
20
22
26#include "ocean/math/Lookup2.h"
31#include "ocean/math/Vector2.h"
32
33namespace Ocean
34{
35
36namespace CV
37{
38
39/**
40 * This class implements bilinear frame interpolator functions.
41 * @ingroup cv
42 */
43class OCEAN_CV_EXPORT FrameInterpolatorBilinear
44{
45 public:
46
47 /**
48 * Definition of a lookup table for 2D vectors.
49 */
51
52 public:
53
54 /**
55 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
56 * Best practice is to avoid using these functions if binary size matters,<br>
57 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
58 */
59 class OCEAN_CV_EXPORT Comfort
60 {
61 public:
62
63 /**
64 * Resizes/rescales a given frame by application of a bilinear interpolation.
65 * @param source The source frame to resize, must be valid
66 * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
67 * @param worker Optional worker object used for load distribution
68 * @return True, if the frame could be resized
69 */
70 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
71
72 /**
73 * Resizes/rescales a given frame by application of a bilinear interpolation.
74 * @param frame The frame to resize, must be valid
75 * @param width The width of the resized frame in pixel, with range [1, infinity)
76 * @param height The height of the resized frame in pixel, with range [1, infinity)
77 * @param worker Optional worker object used for load distribution
78 * @return True, if the frame could be resized
79 */
80 static inline bool resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker = nullptr);
81
82 /**
83 * Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
84 * The resulting zoomed image will have the same frame type (frame resolution, pixel format, pixel origin) as the input image.<br>
85 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
86 * @param source The source frame for which the zoomed image content will be created, must be valid
87 * @param target The resulting target frame which will receive the zoomed image, will be set to the same frame type as the source frame, can be invalid
88 * @param zoomFactor The zoom factor to be applied, a factor < 1 will zoom out, a factor > 1 will zoom in, with range (0, infinity)
89 * @param worker Optional worker object to distribute the computation to several CPU cores
90 * @return True, if succeeded
91 */
92 static bool zoom(const Frame& source, Frame& target, const Scalar zoomFactor, Worker* worker = nullptr);
93
94 /**
95 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
96 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
97 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
98 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
99 * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
100 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
101 * @param input The input frame that will be transformed, must be valid
102 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
103 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
104 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels and the data type of the pixel elements, nullptr to assign 0 to each channel
105 * @param worker Optional worker object to distribute the computational load
106 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
107 * @return True, if succeeded
108 */
109 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
110
111 /**
112 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
113 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
114 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
115 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
116 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
117 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
118 * @param input The input frame that will be transformed
119 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
120 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
121 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
122 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
123 * @param worker Optional worker object to distribute the computational load
124 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
125 * @return True, if succeeded
126 */
127 static bool homographies(const Frame& input, Frame& output, const SquareMatrix3 homographies[4], const Vector2& outputQuadrantCenter, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
128
129 /**
130 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
131 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
132 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
133 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
134 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
135 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
136 * @param input The input frame that will be transformed, must be valid
137 * @param output The Output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
138 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid and must have the same frame dimension as the output frame
139 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
140 * @param worker Optional worker object to distribute the computational load
141 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
142 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
143 * @return True, if succeeded
144 * @see coversHomographyInputFrame().
145 */
146 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
147
148 /**
149 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
150 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
151 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
152 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
153 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
154 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
155 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
156 * @param input The input frame that will be transformed, must be valid
157 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
158 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
159 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
160 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
161 * @param worker Optional worker object to distribute the computational load
162 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
163 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
164 * @return True, if succeeded
165 * @see coversHomographyInputFrame().
166 */
167 static bool homographiesMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3* homographies, const Vector2& outputQuadrantCenter, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
168
169 /**
170 * Transforms a given input frame into an output frame by application of a homography.
171 * This function also uses a camera profile to improve the interpolation accuracy.<br>
172 * The given homography is transformed into a homography for normalized image coordinates.<br>
173 * Thus, also distortion parameters of the camera profile can be applied.<br>
174 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
175 * @param inputCamera The pinhole camera profile to be applied for the input frame
176 * @param outputCamera The pinhole camera profile to be applied for the output frame
177 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
178 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
179 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
180 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
181 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
182 * @param worker Optional worker object to distribute the computational load
183 * @return True, if succeeded
184 * @see homographyWithCameraMask(), homography().
185 */
186 static bool homographyWithCamera(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const Frame& input, Frame& output, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor = nullptr, Worker* worker = nullptr);
187
188 /**
189 * Transforms a given input frame into an output frame by application of a homography.
190 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
191 * This function also uses a camera profile to improve the interpolation accuracy.<br>
192 * The given homography is transformed into a homography for normalized image coordinates.<br>
193 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
194 * Thus, also distortion parameters of the camera profile can be applied.<br>
195 * @param inputCamera The pinhole camera profile to be applied for the input frame
196 * @param outputCamera The pinhole camera profile to be applied for the output frame
197 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
198 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
199 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
200 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
201 * @param worker Optional worker object to distribute the computational load
202 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
203 * @return True, if succeeded
204 * @see homographyWithCamera(), homography().
205 */
206 static bool homographyWithCameraMask(const AnyCamera& inputCamera, const AnyCamera& outputCamera, const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& homography, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
207
208 /**
209 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
210 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
211 * Information: This function is the equivalent to OpenCV's cv::remap().
212 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
213 * @param input The input frame that will be transformed
214 * @param output Resulting output frame, the dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
215 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
216 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
217 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
218 * @param worker Optional worker object to distribute the computation
219 * @return True, if succeeded
220 */
221 static bool lookup(const Frame& input, Frame& output, const LookupTable& input_LT_output, const bool offset, const void* borderColor, Worker* worker = nullptr);
222
223 /**
224 * Transforms a given input frame into an output frame by application of an interpolation lookup table and creates and additional mask as output.
225 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
226 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
227 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
228 * @param input The input frame which will be transformed
229 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
230 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
231 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
232 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
233 * @param worker Optional worker object to distribute the computation
234 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
235 * @return True, if succeeded
236 */
237 static bool lookupMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& input_LT_output, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
238
239 /**
240 * Applies an affine transformation to an image.
241 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.
242 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation.
243 * The multiplication of the affine transformation with pixel location in the target image yield their location in the source image, i.e., sourcePoint = source_A_target * targetPoint.
244 * The parameter 'targetOrigin' applies an additional translation to the provided affine transformation i.e., source_A_target * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
245 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
246 * <pre>
247 * a c e
248 * b d f
249 * 0 0 1
250 * </pre>
251 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
252 * Information: This function is the equivalent to OpenCV's cv::warpAffine().
253 * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
254 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
255 * @param source The source frame that will be transformed, must be valid
256 * @param target The resulting frame after applying the affine transformation to the source frame; pixel format and pixel origin must be identical to source frame; memory of target frame must be allocated by the caller
257 * @param source_A_target Affine transform used to transform the given source frame, transforming points defined in the target frame into points defined in the source frame
258 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
259 * @param worker Optional worker object to distribute the computational load
260 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
261 * @return True, if succeeded
262 */
263 static bool affine(const Frame& source, Frame& target, const SquareMatrix3& source_A_target, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& targetOrigin = PixelPositionI(0, 0));
264
265 /**
266 * Rotates a given frame by a bilinear interpolation.
267 * The frame will be rotated around a specified anchor position (inside or outside the frame).<br>
268 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
269 * @param source The source frame to be rotated, must be valid
270 * @param target The target frame which will receive the rotated image, will be set to the same frame type as the source frame, can be invalid
271 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
272 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
273 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
274 * @param worker Optional worker object to distribute the computation to several CPU cores
275 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
276 * @return True, if succeeded
277 */
278 static bool rotate(const Frame& source, Frame& target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
279
280 /**
281 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
282 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
283 * @param sourceFrame The source image captured with the source camera profile, must be valid
284 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
285 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
286 * @param targetCamera The camera profile of the target frame, must be valid
287 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
288 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
289 * @param worker Optional worker object to distribute the computational load
290 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
291 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use ElementType(0) for each channel
292 * @return True, if succeeded
293 * @see resampleCameraImageImage8BitPerChannel().
294 */
295 static bool resampleCameraImage(const Frame& sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, Frame& targetFrame, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const void* borderColor = nullptr);
296
297 /**
298 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
299 * This function uses an integer interpolation with a precision of 1/128.
300 * @param frame The frame to determine the pixel values from, must be valid
301 * @param channels Number of channels of the given frame, with range [1, 8]
302 * @param width The width of the frame in pixel, with range [1, infinity)
303 * @param height The height of the frame in pixel, with range [1, infinity)
304 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
305 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
306 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
307 * @param result Resulting pixel values, must be valid, must be valid
308 * @return True, if succeeded
309 * @tparam TScalar The scalar data type of the sub-pixel position
310 */
311 template <typename TScalar = Scalar>
312 static bool interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result);
313
314 /**
315 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
316 * This function uses floating point precision during interpolation.
317 * @param frame The frame to determine the pixel values from, must be valid
318 * @param channels Number of channels of the given frame, with range [1, 8]
319 * @param width The width of the frame in pixel, with range [1, infinity)
320 * @param height The height of the frame in pixel, with range [1, infinity)
321 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
322 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
323 * @param position The position to determine the interpolated pixel values for, with range [0, width)x[0, height)
324 * @param result Resulting interpolated pixel value(s), must be valid
325 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
326 * @return True, if succeeded
327 * @tparam TSource The data type of the provided pixel values in the (source) frame
328 * @tparam TTarget The data type of the resulting interpolated value(s)
329 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
330 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
331 */
332 template <typename TSource, typename TTarget, typename TScalar = Scalar, typename TIntermediate = TScalar>
333 static bool interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
334 };
335
336 /**
337 * This class implements highly optimized interpolation functions with fixed properties.
338 * The functions can be significantly faster as these functions are tailored to the specific properties.
339 */
340 class OCEAN_CV_EXPORT SpecialCases
341 {
342 public:
343
344 /**
345 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
346 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
347 * @param source The source frame buffer with resolution 400x400, must be valid
348 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
349 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
350 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
351 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
352 */
353 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
354
355 /**
356 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 by using a bilinear interpolation.
357 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
358 * @param source The source frame buffer with resolution 400x400, must be valid
359 * @param target The target frame buffer receiving the resized image information, with resolution 256x256, must be valid
360 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
361 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
362 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
363 */
364 static void resize400x400To256x256_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
365 };
366
367 /**
368 * Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinear interpolation.
369 * This function is actually a wrapper for scale().
370 * @param source The source frame buffer providing the image information to be resized, must be valid
371 * @param target The target frame buffer receiving the resized image information, must be valid
372 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
373 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
374 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
375 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
376 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
377 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
378 * @param worker Optional worker object to distribute the computation to several CPU cores
379 * @tparam T Data type of each pixel channel, e.g., float, double, int
380 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
381 * @see scale<T, tChannels>().
382 */
383 template <typename T, unsigned int tChannels>
384 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
385
386 /**
387 * Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interpolation with user-defined scaling factors.
388 * Beware: This function is not optimized for performance but supports arbitrary data types.<br>
389 * Try to use scale8BitPerChannel() if possible.
390 * @param source The source frame buffer providing the image information to be resized, must be valid
391 * @param target The target frame buffer receiving the rescaled image information, must be valid
392 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
393 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
394 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
395 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
396 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
397 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
398 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
399 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
400 * @param worker Optional worker object to distribute the computation to several CPU cores
401 * @tparam T Data type of each pixel channel, e.g., float, double, int
402 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
403 * @see resize<T, tChannels>().
404 */
405 template <typename T, unsigned int tChannels>
406 static inline void scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
407
408 /**
409 * Rotates a given frame by a bilinear interpolation.
410 * The frame will be rotated around a specified anchor position (inside or outside the frame).
411 * @param source The source frame to be rotated, must be valid
412 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
413 * @param width The width of the source and target frame in pixel, with range [1, infinity)
414 * @param height The height of the source and target frame in pixel, with range [1, infinity)
415 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
416 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
417 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
418 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
419 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
420 * @param worker Optional worker object to distribute the computation to several CPU cores
421 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
422 * @tparam tChannels The number of channels both frames have, with range [1, infinity)
423 */
424 template <unsigned int tChannels>
425 static inline void rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
426
427 /**
428 * Apply an affine transforms to a N-channel, 8-bit frame
429 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.<br>
430 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation).<br>
431 * The 'targetOrigin' parameter simply applies an additional translation onto the provided affine transformation i.e., affine * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
432 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
433 * <pre>
434 * a c e
435 * b d f
436 * 0 0 1
437 * </pre>
438 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
439 * @param source Input frame that will be transformed, must be valid
440 * @param sourceWidth Width of both images in pixel, with range [1, infinity)
441 * @param sourceHeight Height of both images pixel, with range [1, infinity)
442 * @param source_A_target Affine transformation, such that: sourcePoint = source_A_target * targetPoint
443 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
444 * @param target The target frame using the given affine transform, must be valid
445 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
446 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
447 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
448 * @param sourcePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
449 * @param targetPaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
450 * @param worker Optional worker object to distribute the computational load
451 * @tparam tChannels Number of channels of the frame
452 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
453 */
454 template <unsigned int tChannels>
455 static inline void affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
456
457 /**
458 * Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of a homography.
459 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
460 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
461 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
462 * @param input The input frame that will be transformed, must be valid
463 * @param inputWidth Width of both images in pixel, with range [1, infinity)
464 * @param inputHeight Height of both images pixel, with range [1, infinity)
465 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
466 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
467 * @param output The output frame using the given homography, must be valid
468 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
469 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
470 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
471 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
472 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
473 * @param worker Optional worker object to distribute the computational load
474 * @tparam T Data type of each pixel channel, e.g., float, double, int
475 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
476 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
477 */
478 template <typename T, unsigned int tChannels>
479 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
480
481 /**
482 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
483 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
484 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
485 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
486 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
487 * @param input The input frame that will be transformed
488 * @param inputWidth Width of both images in pixel, with range [1, infinity)
489 * @param inputHeight Height of both images pixel, with range [1, infinity)
490 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
491 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
492 * @param output The output frame using the given homography
493 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
494 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
495 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
496 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
497 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
498 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
499 * @param worker Optional worker object to distribute the computational load
500 * @tparam tChannels Number of channels of the frame
501 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
502 */
503 template <unsigned int tChannels>
504 static inline void homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
505
506 /**
507 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
508 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
509 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
510 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
511 * @param input The input frame that will be transformed, must be valid
512 * @param inputWidth Width of both images in pixel, with range [1, infinity)
513 * @param inputHeight Height of both images pixel, with range [1, infinity)
514 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
515 * @param output The output frame using the given homography, must be valid
516 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid
517 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
518 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
519 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
520 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
521 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
522 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
523 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
524 * @param worker Optional worker object to distribute the computational load
525 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
526 * @see homography(), homographyWithCamera8BitPerChannel().
527 */
528 template <unsigned int tChannels>
529 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue /* = 0xFF*/, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr);
530
531 /**
532 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
533 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
534 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
535 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
536 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
537 * @param input The input frame that will be transformed
538 * @param inputWidth Width of both images in pixel, with range [1, infinity)
539 * @param inputHeight Height of both images pixel, with range [1, infinity)
540 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
541 * @param output The output frame using the given homography
542 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
543 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
544 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
545 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
546 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
547 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
548 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
549 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
550 * @param worker Optional worker object to distribute the computational load
551 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
552 * @tparam tChannels Number of channels of the frame
553 * @see homography(), homographyWithCamera8BitPerChannel().
554 */
555 template <unsigned int tChannels>
556 static inline void homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
557
558 /**
559 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
560 * This function also uses a camera profile to improve the interpolation accuracy.<br>
561 * The given homography is transformed into a homography for normalized image coordinates.<br>
562 * Thus, also distortion parameters of the camera profile can be applied.<br>
563 * @param inputCamera The pinhole camera profile to be applied for the input frame
564 * @param outputCamera The pinhole camera profile to be applied for the output frame
565 * @param input The input frame that will be transformed
566 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
567 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
568 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
569 * @param output The output frame using the given homography
570 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
571 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
572 * @param worker Optional worker object to distribute the computational load
573 * @tparam tChannels Number of channels of the frame
574 * @see homography().
575 */
576 template <unsigned int tChannels>
577 static inline void homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
578
579 /**
580 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
581 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame.<br>
582 * This function also uses a camera profile to improve the interpolation accuracy.<br>
583 * The given homography is transformed into a homography for normalized image coordinates.<br>
584 * Thus, also distortion parameters of the camera profile can be applied.
585 * @param inputCamera The pinhole camera profile to be applied for the input frame, must be valid
586 * @param outputCamera The pinhole camera profile to be applied for the output frame, must be valid
587 * @param input The input frame that will be transformed, must be valid
588 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
589 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
590 * @param output The output frame using the given homography
591 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
592 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
593 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
594 * @param worker Optional worker object to distribute the computational load
595 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
596 * @tparam tChannels Number of channels of the frame
597 */
598 template <unsigned int tChannels>
599 static inline void homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
600
601 /**
602 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
603 * The frame must have a 1-plane pixel format.<br>
604 * The output frame must have the same pixel format and pixel origin as the input frame.
605 * @param input The input frame which will be transformed, must be valid
606 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
607 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
608 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
609 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
610 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
611 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
612 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
613 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
614 * @param worker Optional worker object to distribute the computation
615 * @tparam T Data type of each pixel channel, e.g., float, double, int
616 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
617 */
618 template <typename T, unsigned int tChannels>
619 static inline void lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
620
621 /**
622 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
623 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
624 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
625 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
626 * @param input The input frame which will be transformed
627 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
628 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
629 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
630 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
631 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
632 * @param outputMask Resulting mask frame with 8 bits per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
633 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
634 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
635 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
636 * @param worker Optional worker object to distribute the computation
637 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
638 * @tparam tChannels Number of channels of the frame
639 */
640 template <unsigned int tChannels>
641 static inline void lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
642
643 /**
644 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
645 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
646 * @param sourceFrame The source image captured with the source camera profile, must be valid
647 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
648 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
649 * @param targetCamera The camera profile of the target frame, must be valid
650 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
651 * @param sourceFramePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
652 * @param targetFramePaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
653 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
654 * @param worker Optional worker object to distribute the computational load
655 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
656 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use T(0) for each channel
657 * @tparam T Data type of each pixel channel, e.g., uint8_t, int16_t, float, double
658 * @tparam tChannels The number of frame channels, with range [1, infinity)
659 * @see Comfort::resampleCameraImage().
660 */
661 template <typename T, unsigned int tChannels>
662 static void resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const T* borderColor = nullptr);
663
664 /**
665 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
666 * This function uses an integer interpolation with a precision of 1/128.
667 * @param frame The frame to determine the pixel values from, must be valid
668 * @param width The width of the frame in pixel, with range [1, infinity)
669 * @param height The height of the frame in pixel, with range [1, infinity)
670 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
671 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
672 * @param result Resulting pixel values, must be valid, must be valid
673 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
674 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
675 * @tparam TScalar The scalar data type of the sub-pixel position
676 * @see interpolatePixel().
677 */
678 template <unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
679 static inline void interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result);
680
681 /**
682 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
683 * This function uses floating point precision during interpolation.
684 * @param frame The frame to determine the pixel values from, must be valid
685 * @param width The width of the frame in pixel, with range [1, infinity)
686 * @param height The height of the frame in pixel, with range [1, infinity)
687 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
688 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
689 * @param result Resulting interpolated pixel value(s), must be valid
690 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
691 * @tparam TSource The data type of the provided pixel values in the (source) frame
692 * @tparam TTarget The data type of the resulting interpolated value(s)
693 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
694 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
695 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
696 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
697 * @see interpolatePixel8BitPerChannel().
698 */
699 template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar, typename TIntermediate = TScalar>
700 static inline void interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
701
702 /**
703 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame with alpha channel.
704 * The center of each pixel is located with an offset of (0.5 x 0.5) in relation to the real pixel position.<br>
705 * The given frame is virtually extended by a fully transparent border so that this functions supports arbitrary interpolation positions.<br>
706 * If the given position lies inside the frame area of (-0.5, -0.5) -> (width + 0.5, height + 0.5) the resulting interpolation result will contain color information of the frame, otherwise a fully transparent interpolation result is provided.<br>
707 * @param frame The frame to determine the pixel values from, must be valid
708 * @param width The width of the frame in pixel, with range [1, infinity)
709 * @param height The height of the frame in pixel, with range [1, infinity)
710 * @param position The position to determine the interpolated pixel values for, with range (-infinity, infinity)x(-infinity, infinity)
711 * @param result Resulting pixel values, must be valid
712 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
713 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
714 * @tparam tAlphaAtFront True, if the alpha channel is in the front of the data channels
715 * @tparam tTransparentIs0xFF True, if 0xFF is interpreted as fully transparent
716 */
717 template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
718 static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements);
719
720 /**
721 * Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as lined integral frame.
722 * @param linedIntegralFrame The lined integral image created from the actual gray-scale image for which the patch intensity sum will be determined, must be valid
723 * @param frameWidth Width of the original frame in pixel (not the width of the lined-integral frame), with range [1, infinity)
724 * @param frameHeight Height of the original frame in pixel (not the height of the lined-integral frame), with range [1, infinity)
725 * @param lineIntegralFramePaddingElements The number of padding elements at the end of each integral image row, in elements, with range [0, infinity)
726 * @param center 2D coordinates of the center point of the patch, with range [patchWidth/2, frameWidth - patchWidth/2)x[patchHeight/2, frameHeight - patchHeight/2) for PC_CENTER
727 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
728 * @param patchWidth Width of the calculated patch in pixel with range [1, frameWidth - 1]
729 * @param patchHeight Height of the calculated patch in pixel with range [1, frameHeight - 1]
730 * @return The resulting sum of the pixel intensities
731 */
732 static Scalar patchIntensitySum1Channel(const uint32_t* linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2& center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight);
733
734 /**
735 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the bilinear interpolation) or whether the homography relies on missing image information.
736 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
737 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
738 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
739 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
740 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
741 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
742 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
743 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
744 */
745 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
746
747 private:
748
749 /**
750 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
751 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
752 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
753 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
754 * @param input The input frame that will be transformed, must be valid
755 * @param inputWidth Width of both images in pixel, with range [1, infinity)
756 * @param inputHeight Height of both images pixel, with range [1, infinity)
757 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
758 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
759 * @param output The output frame using the given homography, must be valid
760 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
761 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
762 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
763 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
764 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
765 * @param worker Optional worker object to distribute the computational load
766 * @tparam tChannels Number of channels of the frame
767 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
768 */
769 template <unsigned int tChannels>
770 static inline void homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
771
772 /**
773 * Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-defined scaling factors.
774 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
775 * Information: This function is the equivalent to OpenCV's cv::resize().
776 * @param source The source frame buffer providing the image information to be resized, must be valid
777 * @param target The target frame buffer receiving the rescaled image information, must be valid
778 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
779 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
780 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
781 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
782 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
783 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
784 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
785 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
786 * @param worker Optional worker object to distribute the computation to several CPU cores
787 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
788 */
789 template <unsigned int tChannels>
790 static inline void scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
791
792 /**
793 * Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
794 * @param source The image data of the source frame to be resized, must be valid
795 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
796 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
797 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
798 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
799 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
800 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
801 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
802 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
803 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
804 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
805 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
806 * @tparam tChannels Number of frame channels, with range [0, infinity)
807 */
808 template <unsigned int tChannels>
809 static void scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
810
811 /**
812 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
813 * This function uses interpolation factors with 7 bit precision and does not apply any SIMD instructions.
814 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
815 * @param targetRow The target row receiving the interpolation result, must be valid
816 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
817 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
818 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
819 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
820 * @see interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON<tChannels>().
821 */
822 static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
823
824 /**
825 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
826 * This function does not apply any SIMD instructions.<br>
827 * The length of both source rows is identical with the length of the target row.
828 * @param sourceRowTop The top source row to be used for interpolation, must be valid
829 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
830 * @param targetRow The target row receiving the interpolation result, must be valid
831 * @param elements The number of elements in the row to (width * channels), with range [1, infinity)
832 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
833 * @tparam T The data type of each element, should be 'float'
834 */
835 template <typename T>
836 static void interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
837
838 /**
839 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
840 * This function does not apply any SIMD instructions.
841 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
842 * @param targetRow The target row receiving the interpolation result, must be valid
843 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
844 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
845 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
846 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
847 * @tparam T The data type of each element, should be 'float'
848 * @tparam tChannels The number of frame channels this function can handle, should be 1
849 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
850 */
851 template <typename T, unsigned int tChannels>
852 static void interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
853
854#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
855
856 /**
857 * Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
858 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.<br>
859 * The length of both source rows is identical with the length of the target row.
860 * @param sourceRowTop The top source row to be used for interpolation, must be valid
861 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
862 * @param targetRow The target row receiving the interpolation result, must be valid
863 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
864 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 128 - factorBottom, with range [0, 128]
865 */
866 static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t* sourceRowTop, const uint8_t* sourceRowBottom, uint8_t* targetRow, const unsigned int elements, const unsigned int factorBottom);
867
868 /**
869 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
870 * This function applies NEON instructions.<br>
871 * The length of both source rows is identical with the length of the target row.
872 * @param sourceRowTop The top source row to be used for interpolation, must be valid
873 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
874 * @param targetRow The target row receiving the interpolation result, must be valid
875 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
876 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
877 * @tparam T The data type of each element, should be 'float'
878 */
879 template <typename T>
880 static void interpolateRowVerticalNEON(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
881
882 /**
883 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
884 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
885 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
886 * @param targetRow The target row receiving the interpolation result, must be valid
887 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
888 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
889 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
890 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
891 * @tparam tChannels The number of frame channels this function can handle, possible values are 1, 4
892 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
893 */
894 template <unsigned int tChannels>
895 static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
896
897 /**
898 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
899 * This function applies NEON instructions.
900 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
901 * @param targetRow The target row receiving the interpolation result, must be valid
902 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
903 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
904 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
905 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
906 * @tparam T The data type of each element, should be 'float'
907 * @tparam tChannels The number of frame channels this function can handle, should be 1
908 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
909 */
910 template <typename T, unsigned int tChannels>
911 static void interpolateRowHorizontalNEON(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
912
913 /**
914 * Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
915 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
916 * @param source The image data of the source frame to be resized, must be valid
917 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
918 * @param sourceWidth Width of the source frame in pixel, with range [2, 65.535]
919 * @param sourceHeight Height of the source frame in pixel, with range [1, 65.535]
920 * @param targetWidth Width of the target frame in pixel, with range [tMinimalTargetWidth, 65.535]
921 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
922 * @param channels The number of channels both frames have, with range [1, infinity)
923 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
924 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
925 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
926 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
927 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
928 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
929 * @see interpolateRowVertical8BitPerChannel7BitPrecisionNEON(), interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON().
930 */
931 static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
932
933#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
934
935 /**
936 * Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
937 * @param source The image data of the source frame to be resized, must be valid
938 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
939 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
940 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
941 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
942 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
943 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
944 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
945 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
946 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
947 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
948 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
949 * @tparam T The data type of each pixel channel, e.g., float, double, int, short, ...
950 * @tparam TScale The data type of the internal scaling factors to be used, should be 'float' or 'double'
951 * @tparam tChannels Number of frame channels, with range [0, infinity)
952 */
953 template <typename T, typename TScale, unsigned int tChannels>
954 static void scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
955
956 /**
957 * Rotates a subset of a given frame by a bilinear interpolation.
958 * @param source The source frame to be rotated, must be valid
959 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
960 * @param width The width of the source and target frame in pixel, with range [1, infinity)
961 * @param height The height of the source and target frame in pixel, with range [1, infinity)
962 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
963 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
964 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
965 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
966 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
967 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
968 * @param firstTargetRow The first row of the target frame to be handled, with range [0, height)
969 * @param numberTargetRows The number of rows in the target frame to be handled, with range [1, height - firstTargetRow]
970 * @tparam tChannels Number of frame channels, with range [1, infinity)
971 */
972 template <unsigned int tChannels>
973 static void rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
974
975 /**
976 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
977 * The affine transform must be provided in the following form: `sourcePoint = source_A_target * targetPoint`
978 * This function does not apply SIMD instructions and can be used for any frame dimensions.
979 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
980 * <pre>
981 * a c e
982 * b d f
983 * 0 0 1
984 * </pre>
985 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
986 * @param source Input frame that will be transformed
987 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
988 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
989 * @param source_A_target Affine transformation which is applied to the source frame.
990 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
991 * @param target Output frame using the given affine transform
992 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
993 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
994 * @param firstTargetRow The first target row to be handled
995 * @param numberTargetRows Number of target rows to be handled
996 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
997 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
998 * @tparam tChannels Number of frame channels, with range [1, infinity)
999 * @see affine8BitPerChannelSSESubset(), affine8BitPerChannelNEONSubset()
1000 */
1001 template <unsigned int tChannels>
1002 static inline void affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1003
1004 /**
1005 * Transforms an 8 bit per channel frame using the given homography.
1006 * The homography must provide the following transformation: inputPoint = homography * outputPoint
1007 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1008 * @param input The input frame that will be transformed
1009 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1010 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1011 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1012 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1013 * @param output The output frame using the given homography
1014 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1015 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1016 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1017 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1018 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1019 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1020 * @tparam tChannels Number of frame channels, with range [1, infinity)
1021 * @see homography8BitPerChannelSSESubset(), homography8BitPerChannelNEONSubset()
1022 */
1023 template <unsigned int tChannels>
1024 static inline void homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1025
1026 /**
1027 * Transforms a frame with (almost) arbitrary pixel format using the given homography.
1028 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1029 * @param input The input frame that will be transformed
1030 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1031 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1032 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1033 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1034 * @param output The output frame using the given homography
1035 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1036 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1037 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1038 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1039 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1040 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1041 * @tparam T Data type of each pixel channel, e.g., float, double, int
1042 * @tparam tChannels Number of frame channels, with range [1, infinity)
1043 * @see homography8BitPerChannelSSESubset().
1044 */
1045 template <typename T, unsigned int tChannels>
1046 static inline void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1047
1048#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1049
1050 /**
1051 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
1052 * This function applies SSE instructions.<br>
1053 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1054 * This function has the property: sourcePoint = source_A_target * targetPoint
1055 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1056 * <pre>
1057 * a c e
1058 * b d f
1059 * 0 0 1
1060 * </pre>
1061 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1062 * @param source Input frame that will be transformed
1063 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1064 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1065 * @param source_A_target Affine transformation which is applied to source frame.
1066 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1067 * @param target The target frame where the result of the transformation will be stored
1068 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1069 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1070 * @param firstTargetRow The first target row to be handled
1071 * @param numberTargetRows Number of target rows to be handled
1072 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1073 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1074 * @tparam tChannels Number of frame channels
1075 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
1076 */
1077 template <unsigned int tChannels>
1078 static inline void affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1079
1080 /**
1081 * Transforms an 8 bit per channel frame using the given homography.
1082 * This function applies SSE instructions.<br>
1083 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames
1084 * @param input The input frame that will be transformed, must be valid
1085 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1086 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1087 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1088 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1089 * @param output The output frame using the given homography, must be valid
1090 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1091 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1092 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1093 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1094 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1095 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1096 * @tparam tChannels Number of frame channels, with range [1, infinity)
1097 * @see homography8BitPerChannelSubset().
1098 */
1099 template <unsigned int tChannels>
1100 static inline void homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1101
1102 /**
1103 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1104 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1105 * @param source The source image in which the four independent pixels are located, must be valid
1106 * @param offsetsTopLeft The four offsets within the source image for the four top-left pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1107 * @param offsetsTopRight The four offsets within the source image for the four top-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1108 * @param offsetsBottomLeft The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1109 * @param offsetsBottomRight The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1110 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1111 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1112 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1113 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1114 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1115 * @tparam tChannels The number of frame channels, with range [1, infinity)
1116 */
1117 template <unsigned int tChannels>
1118 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1119
1120 /**
1121 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1122 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1123 * @param m128_sourcesTopLeft The pixel values of the four top left pixels, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1124 * @param m128_sourcesTopRight The pixel values of the four top right pixels, starting at the first byte may contain unused bytes at the end
1125 * @param m128_sourcesBottomLeft The pixel values of the four bottom left pixels, starting at the first byte may contain unused bytes at the end
1126 * @param m128_sourcesBottomRight The pixel values of the four bottom right pixels, starting at the first byte may contain unused bytes at the end
1127 * @param m128_factorsTopLeft The four interpolation factors of the four top left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1128 * @param m128_factorsTopRight The four interpolation factors of the four top right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1129 * @param m128_factorsBottomLeft The four interpolation factors of the four bottom left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1130 * @param m128_factorsBottomRight The four interpolation factors of the four bottom right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1131 * @return The resulting interpolated pixel values, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1132 * @tparam tChannels The number of frame channels, with range [3, 4]
1133 */
1134 template <unsigned int tChannels>
1135 static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i& m128_sourcesTopLeft, const __m128i& m128_sourcesTopRight, const __m128i& m128_sourcesBottomLeft, const __m128i& m128_sourcesBottomRight, const __m128i& m128_factorsTopLeft, const __m128i& m128_factorsTopRight, const __m128i& m128_factorsBottomLeft, const __m128i& m128_factorsBottomRight);
1136
1137#endif // OCEAN_HARDWARE_SSE_VERSION
1138
1139#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1140
1141 /**
1142 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
1143 * This function applies NEON instructions.<br>
1144 * This one has the property: sourcePoint = source_A_target * targetPoint
1145 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1146 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1147 * <pre>
1148 * a c e
1149 * b d f
1150 * 0 0 1
1151 * </pre>
1152 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1153 * @param source The source frame that will be transformed
1154 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1155 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1156 * @param source_A_target Affine transform used to transform the given source frame.
1157 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1158 * @param target The target frame using the given affine transform
1159 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1160 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1161 * @param firstTargetRow The first target row to be handled
1162 * @param numberTargetRows Number of target rows to be handled
1163 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1164 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1165 * @tparam tChannels Number of frame channels, with range [1, infinity)
1166 * @see homography8BitPerChannelSubset().
1167 */
1168 template <unsigned int tChannels>
1169 static inline void affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1170
1171 /**
1172 * Transforms an 8 bit per channel frame using the given homography.
1173 * This function applies NEON instructions.<br>
1174 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames.
1175 * @param input The input frame that will be transformed
1176 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1177 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1178 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1179 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1180 * @param output The output frame using the given homography
1181 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1182 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1183 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1184 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1185 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1186 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1187 * @tparam tChannels Number of frame channels, with range [1, infinity)
1188 * @see homography8BitPerChannelSubset().
1189 */
1190 template <unsigned int tChannels>
1191 static inline void homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1192
1193 /**
1194 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1195 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1196 * @param source The source image in which the four independent pixels are located, must be valid
1197 * @param offsetsTopLeftElements The four offsets within the source image for the four top-left pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1198 * @param offsetsTopRightElements The four offsets within the source image for the four top-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1199 * @param offsetsBottomLeftElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1200 * @param offsetsBottomRightElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1201 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1202 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1203 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1204 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1205 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1206 * @tparam tChannels The number of frame channels, with range [1, infinity)
1207 */
1208 template <unsigned int tChannels>
1209 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1210
1211 /**
1212 * Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must be known already (top-left, top-right, bottom-left, and bottom-right), further the interpolation factors must be known already.
1213 * @param topLeft_u_8x8 The 8 top left pixel values to be used for interpolation
1214 * @param topRight_u_8x8 The 8 top right pixel values to be used for interpolation
1215 * @param bottomLeft_u_8x8 The 8 bottom left pixel values to be used for interpolation
1216 * @param bottomRight_u_8x8 The 8 bottom right pixel values to be used for interpolation
1217 * @param factorsRight_factorsBottom_128_u_8x16 The eight horizontal interpolation factors for right pixels, and the eight vertical interpolation factors for the bottom pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1218 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1219 */
1220 static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1221
1222#endif // OCEAN_HARDWARE_SSE_VERSION
1223
1224 /**
1225 * Transforms an 8 bit per channel frame using the given homographies.
1226 * @param input The input frame that will be transformed
1227 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1228 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1229 * @param homographies Homographies used to transform the given input frame
1230 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1231 * @param output The output frame using the given homography
1232 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1233 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1234 * @param outputOriginX The horizontal coordinate of the output frame's origin
1235 * @param outputOriginY The vertical coordinate of the output frame's origin
1236 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1237 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1238 * @param inputPaddingElements The number of padding elements at the end of each input frame, in elements, with range [0, infinity)
1239 * @param outputPaddingElements The number of padding elements at the end of each output frame, in elements, with range [0, infinity)
1240 * @param firstOutputRow The first output row to be handled
1241 * @param numberOutputRows Number of output rows to be handled
1242 * @tparam tChannels Number of frame channels
1243 */
1244 template <unsigned int tChannels>
1245 static inline void homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1246
1247 /**
1248 * Transforms an 8 bit per channel frame using the given homography.
1249 * @param input The input frame that will be transformed, must be valid
1250 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1251 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1252 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1253 * @param output The output frame resulting by application of the given homography, must be valid
1254 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1255 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1256 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1257 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1258 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1259 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1260 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
1261 * @param firstOutputRow The first output row to be handled
1262 * @param numberOutputRows Number of output rows to be handled
1263 * @tparam tChannels Number of frame channels, with range [1, infinity)
1264 */
1265 template <unsigned int tChannels>
1266 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1267
1268 /**
1269 * Transforms an 8 bit per channel frame using the given homography.
1270 * @param input The input frame that will be transformed
1271 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1272 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1273 * @param homographies Homographies used to transform the given input frame
1274 * @param output The output frame resulting by application of the given homography
1275 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1276 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1277 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1278 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1279 * @param outputOriginX The horizontal coordinate of the output frame's origin
1280 * @param outputOriginY The vertical coordinate of the output frame's origin
1281 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1282 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1283 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1284 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1285 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
1286 * @param firstOutputRow The first output row to be handled
1287 * @param numberOutputRows Number of output rows to be handled
1288 * @tparam tChannels Number of frame channels
1289 */
1290 template <unsigned int tChannels>
1291 static inline void homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1292
1293 /**
1294 * Transforms an 8 bit per channel frame using the given homography.
1295 * @param inputCamera The pinhole camera profile to be applied for the input frame
1296 * @param outputCamera The pinhole camera profile to be applied for the output frame
1297 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1298 * @param input The input frame that will be transformed
1299 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1300 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
1301 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1302 * @param output The output frame resulting by application of the given homography
1303 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1304 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1305 * @param firstRow The first row to be handled
1306 * @param numberRows Number of rows to be handled
1307 * @tparam tChannels Number of frame channels
1308 */
1309 template <unsigned int tChannels>
1310 static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1311
1312 /**
1313 * Transforms an 8 bit per channel frame using the given homography.
1314 * @param inputCamera The pinhole camera profile to be applied for the input frame
1315 * @param outputCamera The pinhole camera profile to be applied for the output frame
1316 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1317 * @param input The input frame that will be transformed, must be valid
1318 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1319 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1320 * @param output The output frame resulting by application of the given homography
1321 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1322 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1323 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
1324 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1325 * @param firstRow The first row to be handled
1326 * @param numberRows Number of rows to be handled
1327 * @tparam tChannels Number of frame channels
1328 */
1329 template <unsigned int tChannels>
1330 static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
1331
1332 /**
1333 * Transforms a subset of a given input frame with uint8_t as element type into an output frame by application of an interpolation lookup table.
1334 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1335 * @param input The input frame which will be transformed, must be valid
1336 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1337 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1338 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1339 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1340 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1341 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1342 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1343 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1344 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1345 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1346 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1347 */
1348 template <unsigned int tChannels>
1349 static void lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1350
1351 /**
1352 * Transforms a subset of a given input frame with arbitrary element type into an output frame by application of an interpolation lookup table.
1353 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1354 * @param input The input frame which will be transformed, must be valid
1355 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1356 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1357 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1358 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1359 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
1360 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
1361 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1362 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1363 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1364 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1365 * @tparam T Data type of each pixel channel, must not be 'uint8_t'
1366 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1367 */
1368 template <typename T, unsigned int tChannels>
1369 static void lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1370
1371#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1372
1373 /**
1374 * Transforms a subset of a given input frame into an output frame by application of an interpolation lookup table and uses NEON instructions.
1375 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1376 * @param input The input frame which will be transformed, must be valid
1377 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1378 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1379 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), with table width >= 4, must be valid
1380 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1381 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1382 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1383 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1384 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1385 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1386 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1387 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1388 */
1389 template <unsigned int tChannels>
1390 static void lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1391
1392#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1393
1394 /**
1395 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
1396 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1397 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
1398 * @param input The input frame which will be transformed
1399 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1400 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1401 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1402 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1403 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1404 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1405 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
1406 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
1407 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
1408 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
1409 * @param firstRow First row to be handled
1410 * @param numberRows Number of rows to be handled
1411 * @tparam tChannels Number of channels of the frame
1412 */
1413 template <unsigned int tChannels>
1414 static void lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1415};
1416
1417inline bool FrameInterpolatorBilinear::Comfort::resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker)
1418{
1419 ocean_assert(frame.isValid());
1420 ocean_assert(width >= 1u && height >= 1u);
1421
1422 Frame target(FrameType(frame, width, height));
1423
1424 if (!resize(frame, target, worker))
1425 {
1426 return false;
1427 }
1428
1429 target.setTimestamp(frame.timestamp());
1431
1432 frame = std::move(target);
1433 return true;
1434}
1435
1436template <typename TScalar>
1437bool FrameInterpolatorBilinear::Comfort::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result)
1438{
1439 ocean_assert(frame != nullptr);
1440 ocean_assert(channels >= 1u && channels <= 8u);
1441
1442 if (pixelCenter == PC_TOP_LEFT)
1443 {
1444 switch (channels)
1445 {
1446 case 1u:
1447 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1448 return true;
1449
1450 case 2u:
1451 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1452 return true;
1453
1454 case 3u:
1455 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1456 return true;
1457
1458 case 4u:
1459 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1460 return true;
1461
1462 case 5u:
1463 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1464 return true;
1465
1466 case 6u:
1467 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1468 return true;
1469
1470 case 7u:
1471 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1472 return true;
1473
1474 case 8u:
1475 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1476 return true;
1477 }
1478 }
1479 else
1480 {
1481 ocean_assert(pixelCenter == PC_CENTER);
1482
1483 switch (channels)
1484 {
1485 case 1u:
1486 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1487 return true;
1488
1489 case 2u:
1490 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1491 return true;
1492
1493 case 3u:
1494 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1495 return true;
1496
1497 case 4u:
1498 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1499 return true;
1500
1501 case 5u:
1502 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1503 return true;
1504
1505 case 6u:
1506 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1507 return true;
1508
1509 case 7u:
1510 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1511 return true;
1512
1513 case 8u:
1514 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1515 return true;
1516 }
1517 }
1518
1519 ocean_assert(false && "Invalid channel number");
1520 return false;
1521}
1522
1523template <typename TSource, typename TTarget, typename TScalar, typename TIntermediate>
1524bool FrameInterpolatorBilinear::Comfort::interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
1525{
1526 ocean_assert(frame != nullptr);
1527 ocean_assert(channels >= 1u && channels <= 8u);
1528
1529 if (pixelCenter == PC_TOP_LEFT)
1530 {
1531 switch (channels)
1532 {
1533 case 1u:
1534 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1535 return true;
1536
1537 case 2u:
1538 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1539 return true;
1540
1541 case 3u:
1542 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1543 return true;
1544
1545 case 4u:
1546 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1547 return true;
1548
1549 case 5u:
1550 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1551 return true;
1552
1553 case 6u:
1554 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1555 return true;
1556
1557 case 7u:
1558 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1559 return true;
1560
1561 case 8u:
1562 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1563 return true;
1564 }
1565 }
1566 else
1567 {
1568 ocean_assert(pixelCenter == PC_CENTER);
1569
1570 switch (channels)
1571 {
1572 case 1u:
1573 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1574 return true;
1575
1576 case 2u:
1577 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1578 return true;
1579
1580 case 3u:
1581 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1582 return true;
1583
1584 case 4u:
1585 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1586 return true;
1587
1588 case 5u:
1589 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1590 return true;
1591
1592 case 6u:
1593 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1594 return true;
1595
1596 case 7u:
1597 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1598 return true;
1599
1600 case 8u:
1601 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1602 return true;
1603 }
1604 }
1605
1606 ocean_assert(false && "Invalid channel number");
1607 return false;
1608}
1609
1610template <typename T, unsigned int tChannels>
1611inline void FrameInterpolatorBilinear::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1612{
1613 ocean_assert(source != nullptr && target != nullptr);
1614 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1615 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1616
1617 const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1618 const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1619
1620 scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1621}
1622
1623template <typename T, unsigned int tChannels>
1624inline void FrameInterpolatorBilinear::scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1625{
1626 ocean_assert(source != nullptr && target != nullptr);
1627 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1628 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1629 ocean_assert(sourceX_s_targetX > 0.0);
1630 ocean_assert(sourceY_s_targetY > 0.0);
1631
1632 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1633 {
1634 FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1635 return;
1636 }
1637
1638 if (std::is_same<T, uint8_t>::value)
1639 {
1640 // we have a SIMD-based optimized version for 'uint8_t' data types
1641
1642 scale8BitPerChannel<tChannels>((const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1643 }
1644 else
1645 {
1646 typedef typename FloatTyper<T>::Type TScale;
1647
1648 if (worker)
1649 {
1650 worker->executeFunction(Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1651 }
1652 else
1653 {
1654 scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1655 }
1656 }
1657}
1658
1659template <unsigned int tChannels>
1660inline void FrameInterpolatorBilinear::affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const CV::PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1661{
1662 // If applicable, apply an additional translation to the affine transformation.
1663 const SquareMatrix3 adjustedAffineTransform = source_A_target * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(targetOrigin.x()), Scalar(targetOrigin.y()), 1));
1664
1665 if (worker)
1666 {
1667 if (targetWidth >= 4u)
1668 {
1669#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1670 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1671 return;
1672#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1673 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1674 return;
1675#endif
1676 }
1677
1678 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1679 }
1680 else
1681 {
1682 if (targetWidth >= 4u)
1683 {
1684#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1685 affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1686 return;
1687#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1688 affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1689 return;
1690#endif
1691 }
1692
1693 affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1694 }
1695}
1696
1697template <unsigned int tChannels>
1698inline void FrameInterpolatorBilinear::homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1699{
1700 // we adjust the homography to address 'outputOrigin'
1701 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1702
1703 if (worker)
1704 {
1705 if (outputWidth >= 4u)
1706 {
1707#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1708 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1709 return;
1710#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1711 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1712 return;
1713#endif
1714 }
1715
1716 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1717 }
1718 else
1719 {
1720 if (outputWidth >= 4u)
1721 {
1722#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1723 homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1724 return;
1725#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1726 homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1727 return;
1728#endif
1729 }
1730
1731 homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1732 }
1733}
1734
1735template <typename T, unsigned int tChannels>
1736inline void FrameInterpolatorBilinear::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1737{
1738 if (std::is_same<T, uint8_t>::value)
1739 {
1740 homography8BitPerChannel<tChannels>((const uint8_t*)input, inputWidth, inputHeight, input_H_output, (const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1741 return;
1742 }
1743 else
1744 {
1745 // we adjust the homography to address 'outputOrigin'
1746 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1747
1748 if (worker)
1749 {
1750 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1751 }
1752 else
1753 {
1754 homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1755 }
1756 }
1757}
1758
1759template <unsigned int tChannels>
1760inline void FrameInterpolatorBilinear::homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1761{
1762 if (worker)
1763 {
1764 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1765 }
1766 else
1767 {
1768 homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1769 }
1770}
1771
1772template <unsigned int tChannels>
1773inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker)
1774{
1775 // we adjust the homography to address 'outputOrigin'
1776 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1777
1778 if (worker)
1779 {
1780 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1781 }
1782 else
1783 {
1784 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1785 }
1786}
1787
1788template <unsigned int tChannels>
1789inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1790{
1791 if (worker)
1792 {
1793 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1794 }
1795 else
1796 {
1797 homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1798 }
1799}
1800
1801template <unsigned int tChannels>
1802inline void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1803{
1804 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1805
1806 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1807
1808 if (worker)
1809 {
1810 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.height());
1811 }
1812 else
1813 {
1814 homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.height());
1815 }
1816}
1817
1818template <unsigned int tChannels>
1819inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1820{
1821 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1822
1823 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1824
1825 if (worker)
1826 {
1827 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.height(), 11u, 12u, 10u);
1828 }
1829 else
1830 {
1831 homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.height());
1832 }
1833}
1834
1835template <typename T, unsigned int tChannels>
1836inline void FrameInterpolatorBilinear::lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1837{
1838 if constexpr (std::is_same<T, uint8_t>::value)
1839 {
1840#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1841 if ((tChannels >= 1u && input_LT_output.sizeX() >= 8) || (tChannels >= 2u && input_LT_output.sizeX() >= 4))
1842 {
1843 // NEON implementation for 1 channel: min width 8; for 2+ channels: min width 4
1844
1845 if (worker)
1846 {
1847 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1848 }
1849 else
1850 {
1851 lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1852 }
1853
1854 return;
1855 }
1856#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1857
1858 if (worker)
1859 {
1860 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)input_LT_output.sizeY(), 9u, 10u, 20u);
1861 }
1862 else
1863 {
1864 lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1865 }
1866 }
1867 else
1868 {
1869 ocean_assert((!std::is_same<T, uint8_t>::value));
1870
1871 if (worker)
1872 {
1873 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1874 }
1875 else
1876 {
1877 lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1878 }
1879 }
1880}
1881
1882template <unsigned int tChannels>
1883inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1884{
1885 if (worker)
1886 {
1887 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 11u, 12u, 20u);
1888 }
1889 else
1890 {
1891 lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1892 }
1893}
1894
1895template <typename T, unsigned int tChannels>
1896void FrameInterpolatorBilinear::resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target, Worker* worker, const unsigned int binSizeInPixel, const T* borderColor)
1897{
1898 static_assert(tChannels >= 1u, "Invalid channel number!");
1899
1900 ocean_assert(sourceFrame != nullptr);
1901 ocean_assert(sourceCamera.isValid());
1902 ocean_assert(source_R_target.isOrthonormal());
1903 ocean_assert(targetCamera.isValid());
1904 ocean_assert(targetFrame != nullptr);
1905 ocean_assert(binSizeInPixel >= 1u);
1906
1907 const size_t binsX = std::max(1u, targetCamera.width() / binSizeInPixel);
1908 const size_t binsY = std::max(1u, targetCamera.height() / binSizeInPixel);
1909 CV::FrameInterpolatorBilinear::LookupTable lookupTable(targetCamera.width(), targetCamera.height(), binsX, binsY);
1910
1911 for (size_t yBin = 0; yBin <= lookupTable.binsY(); ++yBin)
1912 {
1913 for (size_t xBin = 0; xBin <= lookupTable.binsX(); ++xBin)
1914 {
1915 const Vector2 cornerPosition = lookupTable.binTopLeftCornerPosition(xBin, yBin);
1916
1917 constexpr bool makeUnitVector = false; // we don't need a unit/normalized vector as we project the vector into the camera again
1918
1919 const Vector3 rayI = source_R_target * targetCamera.vector(cornerPosition, makeUnitVector);
1920 const Vector3 rayIF = Vector3(rayI.x(), -rayI.y(), -rayI.z());
1921
1922 if (rayIF.z() > Numeric::eps())
1923 {
1924 const Vector2 projectedPoint = sourceCamera.projectToImageIF(rayIF);
1925
1926 lookupTable.setBinTopLeftCornerValue(xBin, yBin, projectedPoint - cornerPosition);
1927 }
1928 else
1929 {
1930 // simply a coordinate far outside the input
1931 lookupTable.setBinTopLeftCornerValue(xBin, yBin, Vector2(Scalar(sourceCamera.width() * 10u), Scalar(sourceCamera.height() * 10u)));
1932 }
1933 }
1934 }
1935
1936 lookup<T, tChannels>(sourceFrame, sourceCamera.width(), sourceCamera.height(), lookupTable, true /*offset*/, borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1937
1938 if (source_OLT_target)
1939 {
1940 *source_OLT_target = std::move(lookupTable);
1941 }
1942}
1943
1944template <unsigned int tChannels>
1945void FrameInterpolatorBilinear::rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker, const uint8_t* borderColor)
1946{
1947 static_assert(tChannels != 0u, "Invalid channel number!");
1948
1949 ocean_assert(source != nullptr && target != nullptr);
1950 ocean_assert(width >= 1u && height >= 1u);
1951
1952 if (worker)
1953 {
1954 worker->executeFunction(Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1955 }
1956 else
1957 {
1958 rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1959 }
1960}
1961
1962template <unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar>
1963inline void FrameInterpolatorBilinear::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result)
1964{
1965 static_assert(tChannels != 0u, "Invalid channel number!");
1966 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
1967
1968 ocean_assert(frame != nullptr && result != nullptr);
1969 ocean_assert(width != 0u && height != 0u);
1970
1971 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
1972
1973 ocean_assert(position.x() >= TScalar(0));
1974 ocean_assert(position.y() >= TScalar(0));
1975
1976 if constexpr (tPixelCenter == PC_TOP_LEFT)
1977 {
1978 ocean_assert(position.x() <= TScalar(width - 1u));
1979 ocean_assert(position.y() <= TScalar(height - 1u));
1980
1981 const unsigned int left = (unsigned int)(position.x());
1982 const unsigned int top = (unsigned int)(position.y());
1983 ocean_assert(left < width && top < height);
1984
1985 const TScalar tx = position.x() - TScalar(left);
1986 ocean_assert(tx >= 0 && tx <= 1);
1987 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
1988 const unsigned int txi_ = 128u - txi;
1989
1990 const TScalar ty = position.y() - TScalar(top);
1991 ocean_assert(ty >= 0 && ty <= 1);
1992 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
1993 const unsigned int tyi_ = 128u - tyi;
1994
1995 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
1996 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
1997
1998 const uint8_t* const topLeft = frame + top * frameStrideElements + tChannels * left;
1999
2000 const unsigned int txty = txi * tyi;
2001 const unsigned int txty_ = txi * tyi_;
2002 const unsigned int tx_ty = txi_ * tyi;
2003 const unsigned int tx_ty_ = txi_ * tyi_;
2004
2005 for (unsigned int n = 0u; n < tChannels; ++n)
2006 {
2007 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2008 }
2009 }
2010 else
2011 {
2012 ocean_assert(tPixelCenter == PC_CENTER);
2013
2014 ocean_assert(position.x() <= TScalar(width));
2015 ocean_assert(position.y() <= TScalar(height));
2016
2017 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2018 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2019
2020 const unsigned int left = (unsigned int)(xShifted);
2021 const unsigned int top = (unsigned int)(yShifted);
2022
2023 ocean_assert(left < width);
2024 ocean_assert(top < height);
2025
2026 const TScalar tx = xShifted - TScalar(left);
2027 const TScalar ty = yShifted - TScalar(top);
2028
2029 ocean_assert(tx >= 0 && tx <= 1);
2030 ocean_assert(ty >= 0 && ty <= 1);
2031
2032 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2033 const unsigned int txi_ = 128u - txi;
2034
2035 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2036 const unsigned int tyi_ = 128u - tyi;
2037
2038 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2039 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2040
2041 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2042
2043 const unsigned int txty = txi * tyi;
2044 const unsigned int txty_ = txi * tyi_;
2045 const unsigned int tx_ty = txi_ * tyi;
2046 const unsigned int tx_ty_ = txi_ * tyi_;
2047
2048 for (unsigned int n = 0u; n < tChannels; ++n)
2049 {
2050 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2051 }
2052 }
2053}
2054
2055template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar, typename TIntermediate>
2056inline void FrameInterpolatorBilinear::interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
2057{
2058 static_assert(tChannels != 0u, "Invalid channel number!");
2059 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
2060
2061 ocean_assert(frame != nullptr && result != nullptr);
2062 ocean_assert(width != 0u && height != 0u);
2063
2064 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2065
2066 ocean_assert(position.x() >= TScalar(0));
2067 ocean_assert(position.y() >= TScalar(0));
2068
2069 if constexpr (tPixelCenter == PC_TOP_LEFT)
2070 {
2071 ocean_assert(position.x() <= TScalar(width - 1u));
2072 ocean_assert(position.y() <= TScalar(height - 1u));
2073
2074 const unsigned int left = (unsigned int)(position.x());
2075 const unsigned int top = (unsigned int)(position.y());
2076
2077 const TScalar tx = position.x() - TScalar(left);
2078 ocean_assert(tx >= 0 && tx <= 1);
2079
2080 const TScalar ty = position.y() - TScalar(top);
2081 ocean_assert(ty >= 0 && ty <= 1);
2082
2083 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2084 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2085
2086 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2087
2088 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2089 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2090 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2091 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2092
2093 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2094
2095 for (unsigned int n = 0u; n < tChannels; ++n)
2096 {
2097 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2098 }
2099 }
2100 else
2101 {
2102 ocean_assert(tPixelCenter == PC_CENTER);
2103
2104 ocean_assert(position.x() <= TScalar(width));
2105 ocean_assert(position.y() <= TScalar(height));
2106
2107 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2108 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2109
2110 const unsigned int left = (unsigned int)(xShifted);
2111 const unsigned int top = (unsigned int)(yShifted);
2112
2113 ocean_assert(left < width);
2114 ocean_assert(top < height);
2115
2116 const TScalar tx = xShifted - TScalar(left);
2117 const TScalar ty = yShifted - TScalar(top);
2118
2119 ocean_assert(tx >= 0 && tx <= 1);
2120 ocean_assert(ty >= 0 && ty <= 1);
2121
2122 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2123 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2124
2125 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2126
2127 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2128 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2129 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2130 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2131
2132 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2133
2134 for (unsigned int n = 0u; n < tChannels; ++n)
2135 {
2136 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2137 }
2138 }
2139}
2140
2141template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
2142inline void FrameInterpolatorBilinear::interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements)
2143{
2144 static_assert(tChannels != 0u, "Invalid channel number!");
2145
2146 ocean_assert(frame && result);
2147
2148 const Vector2 pos(position.x() - Scalar(0.5), position.y() - Scalar(0.5));
2149
2150 // check whether the position is outside the frame and will therefore be 100% transparent
2151 if (pos.x() <= Scalar(-1) || pos.y() <= Scalar(-1) || pos.x() >= Scalar(width) || pos.y() >= Scalar(height))
2152 {
2153 for (unsigned int n = 0u; n < tChannels - 1u; ++n)
2154 {
2156 }
2157
2158 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2159
2160 return;
2161 }
2162
2163 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2164
2165 const int left = int(Numeric::floor(pos.x()));
2166 const int top = int(Numeric::floor(pos.y()));
2167
2168 ocean_assert(left >= -1 && left < int(width));
2169 ocean_assert(top >= -1 && top < int(height));
2170
2171 if ((unsigned int)left < width - 1u && (unsigned int)top < height - 1u)
2172 {
2173 // we have a valid pixel position for the left, top, right and bottom pixel
2174
2175 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2176 const unsigned int txi_ = 128u - txi;
2177
2178 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2179 const unsigned int tyi_ = 128u - tyi;
2180
2181 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2182
2183 const unsigned int txty = txi * tyi;
2184 const unsigned int txty_ = txi * tyi_;
2185 const unsigned int tx_ty = txi_ * tyi;
2186 const unsigned int tx_ty_ = txi_ * tyi_;
2187
2188 for (unsigned int n = 0u; n < tChannels; ++n)
2189 {
2190 result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2191 + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2192 }
2193 }
2194 else
2195 {
2196 // we do not have a valid pixel for all 4-neighborhood pixels
2197
2198 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2199 const unsigned int txi_ = 128u - txi;
2200
2201 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2202 const unsigned int tyi_ = 128u - tyi;
2203
2204 const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2205 const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2206
2207 ocean_assert(left < int(width) && top < int(height));
2208 const uint8_t* const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2209
2210 const unsigned int txty = txi * tyi;
2211 const unsigned int txty_ = txi * tyi_;
2212 const unsigned int tx_ty = txi_ * tyi;
2213 const unsigned int tx_ty_ = txi_ * tyi_;
2214
2215 for (unsigned int n = FrameBlender::SourceOffset<tAlphaAtFront>::data(); n < tChannels + FrameBlender::SourceOffset<tAlphaAtFront>::data() - 1u; ++n)
2216 {
2217 result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2218 + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2219 }
2220
2221 const uint8_t alphaTopLeft = (left >= 0 && top >= 0) ? topLeft[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2222 const uint8_t alphaTopRight = (left + 1u < width && top >= 0) ? topLeft[rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2223 const uint8_t alphaBottomLeft = (left >= 0 && top + 1u < height) ? topLeft[bottomOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2224 const uint8_t alphaBottomRight = (left + 1u < width && top + 1u < height) ? topLeft[bottomOffset + rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2225
2226 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = (alphaTopLeft * tx_ty_ + alphaTopRight * txty_ + alphaBottomLeft * tx_ty + alphaBottomRight * txty + 8192u) >> 14u;
2227 }
2228}
2229
2230template <unsigned int tChannels>
2231void FrameInterpolatorBilinear::affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberOutputRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2232{
2233 static_assert(tChannels >= 1u, "Invalid channel number!");
2234
2235 ocean_assert(source != nullptr && target != nullptr);
2236 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2237 ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2238 ocean_assert(source_A_target);
2239 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2240
2241 ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2242
2243 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2244
2245 const Scalar scalarSourceWidth_1 = Scalar(sourceWidth - 1u);
2246 const Scalar scalarSourceHeight_1 = Scalar(sourceHeight - 1u);
2247
2248 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2249
2250 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2251 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2252
2253 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2254 {
2255 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2256
2257 /*
2258 * We can slightly optimize the 3x3 matrix multiplication:
2259 *
2260 * | X0 Y0 Z0 | | x |
2261 * | X1 Y1 Z1 | * | y |
2262 * | 0 0 1 | | 1 |
2263 *
2264 * | xx | | X0 * x | | Y0 * y + Z0 |
2265 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2266 *
2267 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2268 *
2269 * C0 = Y0 * y + Z0
2270 * C1 = Y1 * y + Z1
2271 *
2272 * So the computation becomes:
2273 *
2274 * | x' | | X0 * x | | C0 |
2275 * | y' | = | X1 * x | + | C1 |
2276 */
2277
2278 const Vector2 X(source_A_target->data() + 0);
2279 const Vector2 c(Vector2(source_A_target->data() + 3) * Scalar(y) + Vector2(source_A_target->data() + 6));
2280
2281 for (unsigned int x = 0u; x < targetWidth; ++x)
2282 {
2283 const Vector2 sourcePosition = X * Scalar(x) + c;
2284
2285#ifdef OCEAN_DEBUG
2286 const Scalar debugSourceX = (*source_A_target)[0] * Scalar(x) + (*source_A_target)[3] * Scalar(y) + (*source_A_target)[6];
2287 const Scalar debugSourceY = (*source_A_target)[1] * Scalar(x) + (*source_A_target)[4] * Scalar(y) + (*source_A_target)[7];
2288 ocean_assert(sourcePosition.isEqual(Vector2(debugSourceX, debugSourceY), Scalar(0.01)));
2289#endif
2290
2291 if (sourcePosition.x() < Scalar(0) || sourcePosition.x() > scalarSourceWidth_1 || sourcePosition.y() < Scalar(0) || sourcePosition.y() > scalarSourceHeight_1)
2292 {
2293 *targetRow = *bColor;
2294 }
2295 else
2296 {
2297 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2298 }
2299
2300 targetRow++;
2301 }
2302 }
2303}
2304
2305template <unsigned int tChannels>
2306void FrameInterpolatorBilinear::homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2307{
2308 static_assert(tChannels >= 1u, "Invalid channel number!");
2309
2310 ocean_assert(input != nullptr && output != nullptr);
2311 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2312 ocean_assert(outputWidth > 0u && outputHeight > 0u);
2313 ocean_assert(input_H_output != nullptr);
2314
2315 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2316
2317 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2318
2319 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
2320 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
2321
2322 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2323
2324 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2325 const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2326
2327 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2328 {
2329 /*
2330 * We can slightly optimize the 3x3 matrix multiplication:
2331 *
2332 * | X0 Y0 Z0 | | x |
2333 * | X1 Y1 Z1 | * | y |
2334 * | X2 Y2 Z2 | | 1 |
2335 *
2336 * | xx | | X0 * x | | Y0 * y + Z0 |
2337 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2338 * | zz | | X2 * x | | Y2 * y + Z2 |
2339 *
2340 * | xx | | X0 * x | | C0 |
2341 * | yy | = | X1 * x | + | C1 |
2342 * | zz | | X2 * x | | C2 |
2343 *
2344 * As y is constant within the inner loop, we can pre-calculate the following terms:
2345 *
2346 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2347 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2348 */
2349
2350 const Vector2 X(input_H_output->data() + 0);
2351 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2352
2353 const Scalar X2 = (*input_H_output)(2, 0);
2354 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2355
2356 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2357
2358 for (unsigned int x = 0u; x < outputWidth; ++x)
2359 {
2360 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2361 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2362
2363#ifdef OCEAN_DEBUG
2364 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2365 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2366#endif
2367
2368 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
2369 {
2370 *outputRowPixel = bColor;
2371 }
2372 else
2373 {
2374 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2375 }
2376
2377 ++outputRowPixel;
2378 }
2379 }
2380}
2381
2382template <typename T, unsigned int tChannels>
2383void FrameInterpolatorBilinear::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2384{
2385 static_assert(tChannels >= 1u, "Invalid channel number!");
2386
2387 ocean_assert(input != nullptr && output != nullptr);
2388 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2389 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2390 ocean_assert(input_H_output != nullptr);
2391
2392 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2393
2394 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2395
2396 const Scalar scalarInputWidth1 = Scalar(inputWidth - 1u);
2397 const Scalar scalarInputHeight1 = Scalar(inputHeight - 1u);
2398
2399 // we need to find a best matching floating point data type for the intermediate interpolation results
2400 typedef typename FloatTyper<T>::Type TIntermediate;
2401
2402 typedef typename DataType<T, tChannels>::Type PixelType;
2403
2404 constexpr T zeroColor[tChannels] = {T(0)};
2405 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2406
2407 constexpr TIntermediate bias = TIntermediate(0);
2408
2409 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2410 {
2411 /*
2412 * We can slightly optimize the 3x3 matrix multiplication:
2413 *
2414 * | X0 Y0 Z0 | | x |
2415 * | X1 Y1 Z1 | * | y |
2416 * | X2 Y2 Z2 | | 1 |
2417 *
2418 * | xx | | X0 * x | | Y0 * y + Z0 |
2419 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2420 * | zz | | X2 * x | | Y2 * y + Z2 |
2421 *
2422 * | xx | | X0 * x | | C0 |
2423 * | yy | = | X1 * x | + | C1 |
2424 * | zz | | X2 * x | | C3 |
2425 *
2426 * As y is constant within the inner loop, we can pre-calculate the following terms:
2427 *
2428 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2429 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2430 */
2431
2432 const Vector2 X(input_H_output->data() + 0);
2433 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2434
2435 const Scalar X2 = (*input_H_output)(2, 0);
2436 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2437
2438 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2439
2440 for (unsigned int x = 0u; x < outputWidth; ++x)
2441 {
2442 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2443 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2444
2445#ifdef OCEAN_DEBUG
2446 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2447 ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2448#endif
2449
2450 if (inputPosition.x() >= Scalar(0) && inputPosition.x() <= scalarInputWidth1 && inputPosition.y() >= Scalar(0) && inputPosition.y() <= scalarInputHeight1)
2451 {
2452 interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2453 }
2454 else
2455 {
2456 *outputRowPixel = *bColor;
2457 }
2458
2459 ++outputRowPixel;
2460 }
2461 }
2462}
2463
2464#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2465
2466template <unsigned int tChannels>
2467inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2468{
2469 static_assert(tChannels >= 1u, "Invalid channel number!");
2470
2471 ocean_assert(source && target);
2472 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2473 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2474 ocean_assert(source_A_target);
2475 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2476
2477 ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2478
2479 const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2480 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2481
2482 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2483
2484 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2485 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2486
2487 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2488
2489 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2490 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2491 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2492 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2493
2494 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2495 const __m128 m128_f_X0 = _mm_set_ps1(float((*source_A_target)(0, 0)));
2496 const __m128 m128_f_X1 = _mm_set_ps1(float((*source_A_target)(1, 0)));
2497
2498 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2499 {
2500 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2501
2502 /*
2503 * We can slightly optimize the 3x3 matrix multiplication:
2504 *
2505 * | X0 Y0 Z0 | | x |
2506 * | X1 Y1 Z1 | * | y |
2507 * | 0 0 1 | | 1 |
2508 *
2509 * | xx | | X0 * x | | Y0 * y + Z0 |
2510 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2511 *
2512 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2513 *
2514 * C0 = Y0 * y + Z0
2515 * C1 = Y1 * y + Z1
2516 *
2517 * So the computation becomes:
2518 *
2519 * | x' | | X0 * x | | C0 |
2520 * | y' | = | X1 * x | + | C1 |
2521 */
2522
2523 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2524 const __m128 m128_f_C0 = _mm_set_ps1(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
2525 const __m128 m128_f_C1 = _mm_set_ps1(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
2526
2527 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2528 const __m128 m128_f_zero = _mm_setzero_ps();
2529
2530 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2531 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2532
2533 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
2534 const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2535
2536 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2537 const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(int(sourceWidth) - 1);
2538 const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(int(sourceHeight) - 1);
2539
2540 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2541 const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(float(sourceWidth - 1u));
2542 const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(float(sourceHeight - 1u));
2543
2544 for (unsigned int x = 0u; x < targetWidth; x += 4u)
2545 {
2546 if (x + 4u > targetWidth)
2547 {
2548 // the last iteration will not fit into the output frame,
2549 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2550
2551 ocean_assert(x >= 4u && targetWidth > 4u);
2552 const unsigned int newX = targetWidth - 4u;
2553
2554 ocean_assert(x > newX);
2555 targetRow -= x - newX;
2556
2557 x = newX;
2558
2559 // the for loop will stop after this iteration
2560 ocean_assert(!(x + 4u < targetWidth));
2561 }
2562
2563
2564 // we need four successive x coordinate floats:
2565 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2566 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2567
2568 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2569 const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2570 const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2571
2572 // now we check whether we are inside the input frame
2573 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero)); // inputPosition.x() <= (inputWidth - 1) && inputPosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
2574 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero)); // inputPosition.y() <= (inputHeight - 1) && inputPosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
2575
2576 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
2577
2578 // we can stop here if all pixels are invalid
2579 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2580 {
2581#ifdef OCEAN_DEBUG
2582 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2583 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2584 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2585#endif
2586
2587 targetRow[0] = *bColor;
2588 targetRow[1] = *bColor;
2589 targetRow[2] = *bColor;
2590 targetRow[3] = *bColor;
2591
2592 targetRow += 4;
2593
2594 continue;
2595 }
2596
2597 // we store the result
2598 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2599 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2600
2601
2602 // now we determine the left, top, right and bottom pixel used for the interpolation
2603 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2604 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2605
2606 // left = floor(x); top = floor(y)
2607 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2608 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2609
2610 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2611 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2612 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2613
2614 // offset = (y * sourceStrideElements + tChannels * x)
2615 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * sourceStrideElements + tChannels * left)
2616 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * sourceStrideElements + tChannels * right)
2617 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2618 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2619
2620 // we store the offsets
2621 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2622 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2623 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2624 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2625
2626
2627 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2628
2629 // we determine the fractional portions of the x' and y':
2630 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2631 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2632 __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2633 __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2634
2635 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2636 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2637 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2638
2639 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2640 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2641
2642 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2643 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2644
2645 interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2646 targetRow += 4;
2647 }
2648 }
2649}
2650
2651template <unsigned int tChannels>
2652inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2653{
2654 static_assert(tChannels >= 1u, "Invalid channel number!");
2655
2656 ocean_assert(input != nullptr && output != nullptr);
2657 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2658 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2659 ocean_assert(input_H_output != nullptr);
2660
2661 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2662
2663 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2664 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2665
2666 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
2667
2668 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2669 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2670
2671 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2672
2673 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2674 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2675 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2676 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2677
2678 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2679 const __m128 m128_f_X0 = _mm_set_ps1(float((*input_H_output)(0, 0)));
2680 const __m128 m128_f_X1 = _mm_set_ps1(float((*input_H_output)(1, 0)));
2681 const __m128 m128_f_X2 = _mm_set_ps1(float((*input_H_output)(2, 0)));
2682
2683 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2684 const __m128 m128_f_zero = _mm_setzero_ps();
2685
2686 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2687 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2688
2689 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
2690 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2691
2692 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth -1, inputWidth -1], and same with inputHeight
2693 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(int(inputWidth) - 1);
2694 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(int(inputHeight) - 1);
2695
2696 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2697 const __m128 m128_f_inputWidth_1 = _mm_set_ps1(float(inputWidth - 1u));
2698 const __m128 m128_f_inputHeight_1 = _mm_set_ps1(float(inputHeight - 1u));
2699
2700 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2701 {
2702 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2703
2704 /*
2705 * We can slightly optimize the 3x3 matrix multiplication:
2706 *
2707 * | X0 Y0 Z0 | | x |
2708 * | X1 Y1 Z1 | * | y |
2709 * | X2 Y2 Z2 | | 1 |
2710 *
2711 * | xx | | X0 * x | | Y0 * y + Z0 |
2712 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2713 * | zz | | X2 * x | | Y2 * y + Z2 |
2714 *
2715 * | xx | | X0 * x | | C0 |
2716 * | yy | = | X1 * x | + | C1 |
2717 * | zz | | X2 * x | | C2 |
2718 *
2719 * As y is constant within the inner loop, we can pre-calculate the following terms:
2720 *
2721 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2722 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2723 */
2724
2725 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2726 const __m128 m128_f_C0 = _mm_set_ps1(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
2727 const __m128 m128_f_C1 = _mm_set_ps1(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
2728 const __m128 m128_f_C2 = _mm_set_ps1(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
2729
2730 for (unsigned int x = 0u; x < outputWidth; x += 4u)
2731 {
2732 if (x + 4u > outputWidth)
2733 {
2734 // the last iteration will not fit into the output frame,
2735 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2736
2737 ocean_assert(x >= 4u && outputWidth > 4u);
2738 const unsigned int newX = outputWidth - 4u;
2739
2740 ocean_assert(x > newX);
2741 outputPixelData -= x - newX;
2742
2743 x = newX;
2744
2745 // the for loop will stop after this iteration
2746 ocean_assert(!(x + 4u < outputWidth));
2747 }
2748
2749
2750 // we need four successive x coordinate floats:
2751 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2752 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2753
2754 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2755 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2756 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2757 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2758
2759#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
2760
2761 // we calculate the (approximated) inverse of zz,
2762 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
2763 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
2764 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2765
2766 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2767 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2768 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2769
2770#else
2771
2772 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2773 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2774 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2775
2776#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
2777
2778
2779 // now we check whether we are inside the input frame
2780 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero)); // inputPosition.x() <= (inputWidth-1) && inputPosition.x() >= 0 ? 0xFFFFFF : 0x000000
2781 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero)); // inputPosition.y() <= (inputHeight-1) && inputPosition.y() >= 0 ? 0xFFFFFF : 0x000000
2782
2783 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
2784
2785 // we can stop here if all pixels are invalid
2786 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2787 {
2788#ifdef OCEAN_DEBUG
2789 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2790 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2791 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2792#endif
2793
2794 outputPixelData[0] = *bColor;
2795 outputPixelData[1] = *bColor;
2796 outputPixelData[2] = *bColor;
2797 outputPixelData[3] = *bColor;
2798
2799 outputPixelData += 4;
2800
2801 continue;
2802 }
2803
2804 // we store the result
2805 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2806 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2807
2808
2809 // now we determine the left, top, right and bottom pixel used for the interpolation
2810 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2811 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2812
2813 // left = floor(x); top = floor(y)
2814 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2815 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2816
2817 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2818 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2819 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2820
2821 // offset = (y * inputStrideElements + tChannels * x)
2822 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * inputStrideElements + tChannels * left)
2823 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * inputStrideElements + tChannels * right)
2824 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2825 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2826
2827 // we store the offsets
2828 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2829 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2830 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2831 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2832
2833
2834 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2835
2836 // we determine the fractional portions of the x' and y':
2837 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2838 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2839 __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2840 __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2841
2842 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2843 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2844 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2845
2846 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2847 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2848
2849 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2850 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2851
2852 interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2853 outputPixelData += 4;
2854 }
2855 }
2856}
2857
2858template <>
2859OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2860{
2861 // sourcesTopLeft stores the three color values of 4 (independent) pixels (the upper left pixels):
2862 // FEDC BA98 7654 3210
2863 // ---- VUYV UYVU YVUY
2864 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2865
2866 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2867 // FEDC BA98 7654 3210
2868 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2869
2870
2871 // we will simply extract each channel from the source pixels,
2872 // each extracted channel will be multiplied by the corresponding interpolation factor
2873 // and all interpolation results will be accumulated afterwards
2874
2875 // FEDC BA98 7654 3210
2876 const __m128i mask32_Channel0 = SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull); // ---9 ---6 ---3 ---0
2877 const __m128i mask32_Channel1 = SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull); // ---A ---7 ---4 ---1
2878 const __m128i mask32_Channel2 = SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull); // ---B ---8 ---5 ---2
2879
2880
2881 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2882 // FEDC BA98 7654 3210
2883 // ---9 ---6 ---3 ---0
2884 // *
2885 // FTL3 FTL2 FTL1 FTL0
2886 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2887
2888 // we the same multiplication for the second channel
2889 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2890
2891 // and third channel
2892 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2893
2894
2895 // now we repeat the process for the top right pixel values
2896 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2897 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2898 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2899
2900
2901 // and for the bottom left pixel values
2902 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2903 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2904 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2905
2906
2907 // and for the bottom right pixel values
2908 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2909 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2910 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2911
2912
2913 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2914
2915 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
2916
2917 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
2918 // target data: ---9 ---6 ---3 ---0
2919 // shufflet target: ---- --9- -6-- 3--0
2920 // mask location: ---C ---8 ---4 ---0
2921 // mask: ---- --C- -8-- 4--0
2922 __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2923
2924 // target data: ---A ---7 ---4 ---1
2925 // shufflet target: ---- -A-- 7--4 --1-
2926 // mask location: ---C ---8 ---4 ---0
2927 // mask: ---- -C-- 8--4 --0-
2928 __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2929
2930 // target data: ---B ---8 ---5 ---2
2931 // shufflet target: ---- B--8 --5- -2--
2932 // mask location: ---C ---8 ---4 ---0
2933 // mask: ---- C--8 --4- -0--
2934 __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2935
2936
2937 // finally, we simply blend all interpolation results together
2938
2939 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2940}
2941
2942template <>
2943OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2944{
2945 // sourcesTopLeft stores the four color values of 4 (independent) pixels (the upper left pixels):
2946 // FEDC BA98 7654 3210
2947 // AVUY AVUY AVUY AVUY
2948 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2949
2950 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2951 // FEDC BA98 7654 3210
2952 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2953
2954
2955 // we will simply extract each channel from the source pixels,
2956 // each extracted channel will be multiplied by the corresponding interpolation factor
2957 // and all interpolation results will be accumulated afterwards
2958
2959 // FEDC BA98 7654 3210
2960 const __m128i mask32_Channel0 = SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull); // ---C ---8 ---4 ---0
2961 const __m128i mask32_Channel1 = SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull); // ---D ---9 ---5 ---1
2962 const __m128i mask32_Channel2 = SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull); // ---E ---A ---6 ---2
2963 const __m128i mask32_Channel3 = SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull); // ---F ---B ---7 ---3
2964
2965
2966 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2967 // FEDC BA98 7654 3210
2968 // ---C ---8 ---4 ---0
2969 // *
2970 // FTL3 FTL2 FTL1 FTL0
2971 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2972
2973 // we the same multiplication for the second channel
2974 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2975
2976 // and third channel
2977 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2978
2979 // and last channel
2980 __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
2981
2982
2983 // now we repeat the process for the top right pixel values
2984 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2985 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2986 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2987 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
2988
2989
2990 // and for the bottom left pixel values
2991 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2992 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2993 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2994 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
2995
2996
2997 // and for the bottom right pixel values
2998 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2999 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
3000 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
3001 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
3002
3003
3004 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3005
3006 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128)
3007
3008 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3009 // ---C ---8 ---4 ---0
3010 // ---C ---9 ---4 ---0
3011 __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3012
3013 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3014 // ---D ---9 ---5 ---1
3015 // --D- --9- --5- --1-
3016 __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3017
3018 // ---E ---A ---6 ---2
3019 // -E-- -A-- -6-- -2--
3020 __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3021
3022 // ---F ---B ---7 ---3
3023 // F--- B--- 7--- 3---
3024 __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3025
3026
3027 // finally, we simply blend all interpolation results together
3028
3029 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3030}
3031
3032#ifdef OCEAN_COMPILER_MSC
3033
3034// we see a significant performance decrease with non-VS compilers/platforms,
3035// so we do not use the 3channel version with non-Windows compilers
3036
3037template <>
3038OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3039{
3040 ocean_assert(source != nullptr);
3041 ocean_assert(targetPositionPixels != nullptr);
3042
3043 typedef typename DataType<uint8_t, 1u>::Type PixelType;
3044
3045 // as we do not initialize the following intermediate data,
3046 // we hopefully will not allocate memory on the stack each time this function is called
3047 OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3048
3049 // we gather the individual source pixel values from the source image,
3050 // based on the calculated pixel locations
3051 for (unsigned int i = 0u; i < 4u; ++i)
3052 {
3053 if (validPixels[i])
3054 {
3055 pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3056 pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3057 pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3058 pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3059 }
3060 else
3061 {
3062 pixels[i * 4u + 0u] = borderColor;
3063 pixels[i * 4u + 1u] = borderColor;
3064 pixels[i * 4u + 2u] = borderColor;
3065 pixels[i * 4u + 3u] = borderColor;
3066 }
3067 }
3068
3069 static_assert(sizeof(__m128i) == sizeof(pixels), "Invalid data type!");
3070
3071 const __m128i m128_pixels = _mm_load_si128((const __m128i*)pixels);
3072
3073
3074 // factorLeft = 128 - factorRight
3075 // factorTop = 128 - factorBottom
3076
3077 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3078 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3079
3080 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3081 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3082
3083 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3084 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3085 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3086 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3087
3088 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3089 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3090 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3091
3092 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3093 // FEDC BA98 7654 3210
3094 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3095
3096
3097 // we will simply extract each channel from the source pixels,
3098 // each extracted channel will be multiplied by the corresponding interpolation factor
3099 // and all interpolation results will be accumulated afterwards
3100
3101 // FEDC BA98 7654 3210
3102 const __m128i mask32_topLeft = SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull); // ---C ---8 ---4 ---0
3103 const __m128i mask32_topRight = SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull); // ---D ---9 ---5 ---1
3104 const __m128i mask32_bottomLeft = SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull); // ---E ---A ---6 ---2
3105 const __m128i mask32_bottomRight = SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull); // ---F ---B ---7 ---3
3106
3107
3108 // we extract the top left values and multiply them with the interpolation factors
3109 // FEDC BA98 7654 3210
3110 // ---C ---8 ---4 ---0
3111 // *
3112 // FTL3 FTL2 FTL1 FTL0
3113 __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3114 __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3115
3116 multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3117 multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3118
3119 __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3120
3121 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3122
3123 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
3124 // additionally, we shuffle the individual results together
3125
3126 const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3127
3128 *((unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3129}
3130
3131template <>
3132OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3133{
3134 ocean_assert(source != nullptr);
3135 ocean_assert(targetPositionPixels != nullptr);
3136
3137 typedef typename DataType<uint8_t, 3u>::Type PixelType;
3138
3139 // as we do not initialize the following intermediate data,
3140 // we hopefully will not allocate memory on the stack each time this function is called
3141 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3142 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3143 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3144 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3145
3146 // we gather the individual source pixel values from the source image,
3147 // based on the calculated pixel locations
3148 for (unsigned int i = 0u; i < 4u; ++i)
3149 {
3150 if (validPixels[i])
3151 {
3152 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3153 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3154 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3155 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3156 }
3157 else
3158 {
3159 topLeftPixels[i] = borderColor;
3160 topRightPixels[i] = borderColor;
3161 bottomLeftPixels[i] = borderColor;
3162 bottomRightPixels[i] = borderColor;
3163 }
3164 }
3165
3166 static_assert(sizeof(__m128i) <= sizeof(topLeftPixels), "Invalid data type!");
3167
3168 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3169 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3170 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3171 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3172
3173
3174 // factorLeft = 128 - factorRight
3175 // factorTop = 128 - factorBottom
3176
3177 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3178 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3179
3180 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3181 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3182
3183 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3184 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3185 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3186 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3187
3188
3189 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3190
3191 // we copy the first 12 bytes
3192 memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3193}
3194
3195#endif // OCEAN_COMPILER_MSC
3196
3197template <>
3198OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3199{
3200 ocean_assert(source != nullptr);
3201 ocean_assert(targetPositionPixels != nullptr);
3202
3203 typedef typename DataType<uint8_t, 4u>::Type PixelType;
3204
3205 // as we do not initialize the following intermediate data,
3206 // we hopefully will not allocate memory on the stack each time this function is called
3207 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3208 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3209 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3210 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3211
3212 // we gather the individual source pixel values from the source image,
3213 // based on the calculated pixel locations
3214
3215 for (unsigned int i = 0u; i < 4u; ++i)
3216 {
3217 if (validPixels[i])
3218 {
3219 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3220 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3221 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3222 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3223 }
3224 else
3225 {
3226 topLeftPixels[i] = borderColor;
3227 topRightPixels[i] = borderColor;
3228 bottomLeftPixels[i] = borderColor;
3229 bottomRightPixels[i] = borderColor;
3230 }
3231 }
3232
3233 static_assert(sizeof(__m128i) == sizeof(topLeftPixels), "Invalid data type!");
3234
3235 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3236 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3237 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3238 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3239
3240
3241 // factorLeft = 128 - factorRight
3242 // factorTop = 128 - factorBottom
3243
3244 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3245 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3246
3247 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3248 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3249
3250 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3251 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3252 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3253 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3254
3255
3256 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3257
3258 _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3259}
3260
3261template <unsigned int tChannels>
3262OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3263{
3264 ocean_assert(source != nullptr);
3265 ocean_assert(targetPositionPixels != nullptr);
3266
3267 // as we do not initialize the following intermediate data,
3268 // we hopefully will not allocate memory on the stack each time this function is called
3269 OCEAN_ALIGN_DATA(16) unsigned int factorsTopLeft[4];
3270 OCEAN_ALIGN_DATA(16) unsigned int factorsTopRight[4];
3271 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomLeft[4];
3272 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomRight[4];
3273
3274
3275 // factorLeft = 128 - factorRight
3276 // factorTop = 128 - factorBottom
3277
3278 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3279 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3280
3281 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3282 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3283
3284 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3285 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3286 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3287 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3288
3289
3290 // we store the interpolation factors
3291 _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3292 _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3293 _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3294 _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3295
3296 for (unsigned int i = 0u; i < 4u; ++i)
3297 {
3298 if (validPixels[i])
3299 {
3300 const uint8_t* topLeft = source + offsetsTopLeft[i];
3301 const uint8_t* topRight = source + offsetsTopRight[i];
3302
3303 const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3304 const uint8_t* bottomRight = source + offsetsBottomRight[i];
3305
3306 const unsigned int& factorTopLeft = factorsTopLeft[i];
3307 const unsigned int& factorTopRight = factorsTopRight[i];
3308 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3309 const unsigned int& factorBottomRight = factorsBottomRight[i];
3310
3311 for (unsigned int n = 0u; n < tChannels; ++n)
3312 {
3313 ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3314 }
3315 }
3316 else
3317 {
3318 *targetPositionPixels = borderColor;
3319 }
3320
3321 targetPositionPixels++;
3322 }
3323}
3324
3325#endif // OCEAN_HARDWARE_SSE_VERSION
3326
3327#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3328
3329template <unsigned int tChannels>
3330void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
3331{
3332 static_assert(tChannels >= 1u, "Invalid channel number!");
3333
3334 ocean_assert(source && target);
3335 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3336 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
3337 ocean_assert(source_A_target);
3338 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
3339
3340 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3341
3342 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3343 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3344
3345 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3346
3347 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3348 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3349
3350 unsigned int validPixels[4];
3351
3352 unsigned int topLeftOffsetsElements[4];
3353 unsigned int topRightOffsetsElements[4];
3354 unsigned int bottomLeftOffsetsElements[4];
3355 unsigned int bottomRightOffsetsElements[4];
3356
3357 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3358
3359 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3360 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*source_A_target)(0, 0)));
3361 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*source_A_target)(1, 0)));
3362
3363 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3364 {
3365 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3366
3367 /*
3368 * We can slightly optimize the 3x3 matrix multiplication:
3369 *
3370 * | X0 Y0 Z0 | | x |
3371 * | X1 Y1 Z1 | * | y |
3372 * | 0 0 1 | | 1 |
3373 *
3374 * | xx | | X0 * x | | Y0 * y + Z0 |
3375 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3376 *
3377 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
3378 *
3379 * C0 = Y0 * y + Z0
3380 * C1 = Y1 * y + Z1
3381 *
3382 * So the computation becomes:
3383 *
3384 * | x' | | X0 * x | | C0 |
3385 * | y' | = | X1 * x | + | C1 |
3386 */
3387
3388 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3389 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
3390 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
3391
3392 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3393 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3394
3395 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
3396 const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3397
3398 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3399 const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3400 const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3401
3402 // we store 4 floats: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3403 const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(float(sourceWidth - 1u));
3404 const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(float(sourceHeight - 1u));
3405
3406 for (unsigned int x = 0u; x < targetWidth; x += 4u)
3407 {
3408 if (x + 4u > targetWidth)
3409 {
3410 // the last iteration will not fit into the target frame,
3411 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3412
3413 ocean_assert(x >= 4u && targetWidth > 4u);
3414 const unsigned int newX = targetWidth - 4u;
3415
3416 ocean_assert(x > newX);
3417 targetRow -= x - newX;
3418
3419 x = newX;
3420
3421 // the for loop will stop after this iteration
3422 ocean_assert(!(x + 4u < targetWidth));
3423 }
3424
3425
3426 // we need four successive x coordinate floats:
3427 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3428 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3429 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3430
3431 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3432 const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3433 const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3434
3435
3436 // now we check whether we are inside the source frame
3437 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero)); // sourcePosition.x() <= (sourceWidth - 1) && sourcePosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
3438 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero)); // sourcePosition.y() <= (sourceHeight - 1) && sourcePosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
3439
3440 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_source_frame(sourcePosition) ? 0xFFFFFFFF : 0x00000000
3441
3442
3443 // we can stop here if all pixels are invalid
3444 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3445 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3446 {
3447#ifdef OCEAN_DEBUG
3448 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3449 vst1q_u32(debugValidPixels, m128_u_validPixel);
3450 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3451#endif
3452
3453 targetRow[0] = *bColor;
3454 targetRow[1] = *bColor;
3455 targetRow[2] = *bColor;
3456 targetRow[3] = *bColor;
3457
3458 targetRow += 4;
3459
3460 continue;
3461 }
3462
3463
3464 // we store the result
3465 vst1q_u32(validPixels, m128_u_validPixel);
3466 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3467
3468
3469 // now we determine the left, top, right and bottom pixel used for the interpolation
3470 // left = floor(x); top = floor(y)
3471 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3472 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3473
3474 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3475 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3476 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3477
3478 // offset = y * stride + x * channels
3479 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topLeftOffset = top * strideElements + left * channels
3480 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topRightOffset = top * strideElements + right * channels
3481 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements); // ...
3482 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3483
3484 // we store the offsets
3485 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3486 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3487 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3488 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3489
3490
3491 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3492
3493 // we determine the fractional portions of the x' and y':
3494 float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3495 float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3496
3497 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3498 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3499 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3500
3501 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3502 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3503
3504 if constexpr (tChannels > 4u)
3505 {
3506 // normally we would simply call instead of copying the code of the function to this location
3507 // however, if calling the function instead of applying the code here directly
3508 // clang ends with code approx. 20% slower
3509 // thus we make a copy of the code and keep the function for demonstration purposes
3510
3511 //interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetPixelData);
3512 //targetPixelData += 4;
3513
3514 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3515 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3516
3517 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3518 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3519 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3520 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3521 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3522 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3523
3524 unsigned int tx_ty_s[4];
3525 unsigned int txty_s[4];
3526 unsigned int tx_tys[4];
3527 unsigned int txtys[4];
3528
3529 // we store the interpolation factors
3530 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3531 vst1q_u32(txty_s, m128_u_txty_);
3532 vst1q_u32(tx_tys, m128_u_tx_ty);
3533 vst1q_u32(txtys, m128_u_txty);
3534
3535 for (unsigned int i = 0u; i < 4u; ++i)
3536 {
3537 if (validPixels[i])
3538 {
3539 ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3540 ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3541 ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3542 ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3543
3544 const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3545 const uint8_t* topRight = source + topRightOffsetsElements[i];
3546
3547 const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3548 const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3549
3550 const unsigned int tx_ty_ = tx_ty_s[i];
3551 const unsigned int txty_ = txty_s[i];
3552 const unsigned int tx_ty = tx_tys[i];
3553 const unsigned int txty = txtys[i];
3554
3555 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3556
3557 for (unsigned int n = 0u; n < tChannels; ++n)
3558 {
3559 ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3560 }
3561 }
3562 else
3563 {
3564 *targetRow = *bColor;
3565 }
3566
3567 targetRow++;
3568 }
3569 }
3570 else
3571 {
3572 interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3573 targetRow += 4;
3574 }
3575 }
3576 }
3577}
3578
3579template <unsigned int tChannels>
3580void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
3581{
3582 static_assert(tChannels >= 1u, "Invalid channel number!");
3583
3584 ocean_assert(input != nullptr && output != nullptr);
3585 ocean_assert(inputWidth > 0u && inputHeight > 0u);
3586 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
3587 ocean_assert(input_H_output != nullptr);
3588
3589 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3590
3591 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3592 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3593
3594 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
3595
3596 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3597 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3598
3599 unsigned int validPixels[4];
3600
3601 unsigned int topLeftOffsetsElements[4];
3602 unsigned int topRightOffsetsElements[4];
3603 unsigned int bottomLeftOffsetsElements[4];
3604 unsigned int bottomRightOffsetsElements[4];
3605
3606 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3607
3608 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3609 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
3610 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
3611 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
3612
3613 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3614 {
3615 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3616
3617 /*
3618 * We can slightly optimize the 3x3 matrix multiplication:
3619 *
3620 * | X0 Y0 Z0 | | x |
3621 * | X1 Y1 Z1 | * | y |
3622 * | X2 Y2 Z2 | | 1 |
3623 *
3624 * | xx | | X0 * x | | Y0 * y + Z0 |
3625 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3626 * | zz | | X2 * x | | Y2 * y + Z2 |
3627 *
3628 * | xx | | X0 * x | | C0 |
3629 * | yy | = | X1 * x | + | C1 |
3630 * | zz | | X2 * x | | C3 |
3631 *
3632 * As y is constant within the inner loop, we can pre-calculate the following terms:
3633 *
3634 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
3635 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
3636 */
3637
3638 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3639 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
3640 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
3641 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
3642
3643 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3644 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3645
3646 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
3647 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3648
3649 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3650 const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3651 const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3652
3653 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3654 const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(float(inputWidth - 1u));
3655 const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(float(inputHeight - 1u));
3656
3657 for (unsigned int x = 0u; x < outputWidth; x += 4u)
3658 {
3659 if (x + 4u > outputWidth)
3660 {
3661 // the last iteration will not fit into the output frame,
3662 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3663
3664 ocean_assert(x >= 4u && outputWidth > 4u);
3665 const unsigned int newX = outputWidth - 4u;
3666
3667 ocean_assert(x > newX);
3668 outputPixelData -= x - newX;
3669
3670 x = newX;
3671
3672 // the for loop will stop after this iteration
3673 ocean_assert(!(x + 4u < outputWidth));
3674 }
3675
3676
3677 // we need four successive x coordinate floats:
3678 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3679 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3680 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3681
3682 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3683 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3684 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3685 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3686
3687#ifdef USE_DIVISION_ARM64_ARCHITECTURE
3688
3689 // using the division available from ARM64 is more precise
3690 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3691 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3692
3693#else
3694
3695 // we calculate the (approximated) inverse of zz
3696 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
3697 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3698 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
3699
3700 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
3701 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3702 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3703
3704#endif // USE_DIVISION_ARM64_ARCHITECTURE
3705
3706
3707 // now we check whether we are inside the input frame
3708 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
3709 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
3710
3711 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
3712
3713
3714 // we can stop here if all pixels are invalid
3715 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3716 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3717 {
3718#ifdef OCEAN_DEBUG
3719 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3720 vst1q_u32(debugValidPixels, m128_u_validPixel);
3721 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3722#endif
3723
3724 outputPixelData[0] = *bColor;
3725 outputPixelData[1] = *bColor;
3726 outputPixelData[2] = *bColor;
3727 outputPixelData[3] = *bColor;
3728
3729 outputPixelData += 4;
3730
3731 continue;
3732 }
3733
3734
3735 // we store the result
3736 vst1q_u32(validPixels, m128_u_validPixel);
3737 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3738
3739
3740 // now we determine the left, top, right and bottom pixel used for the interpolation
3741 // left = floor(x); top = floor(y)
3742 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3743 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3744
3745 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3746 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3747 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3748
3749 // offset = y * stride + x * channels
3750 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topLeftOffset = top * strideElements + left * channels
3751 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topRightOffset = top * strideElements + right * channels
3752 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements); // ...
3753 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3754
3755 // we store the offsets
3756 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3757 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3758 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3759 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3760
3761
3762 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3763
3764 // we determine the fractional portions of the x' and y':
3765 float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3766 float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3767
3768 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3769 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3770 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3771
3772 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3773 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3774
3775 if constexpr (tChannels > 4u)
3776 {
3777 // normally we would simply call instead of copying the code of the function to this location
3778 // however, if calling the function instead of applying the code here directly
3779 // clang ends with code approx. 20% slower
3780 // thus we make a copy of the code and keep the function for demonstration purposes
3781
3782 //interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3783 //outputPixelData += 4;
3784
3785 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3786 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3787
3788 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3789 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3790 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3791 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3792 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3793 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3794
3795 unsigned int tx_ty_s[4];
3796 unsigned int txty_s[4];
3797 unsigned int tx_tys[4];
3798 unsigned int txtys[4];
3799
3800 // we store the interpolation factors
3801 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3802 vst1q_u32(txty_s, m128_u_txty_);
3803 vst1q_u32(tx_tys, m128_u_tx_ty);
3804 vst1q_u32(txtys, m128_u_txty);
3805
3806 for (unsigned int i = 0u; i < 4u; ++i)
3807 {
3808 if (validPixels[i])
3809 {
3810 ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3811 ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3812 ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3813 ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3814
3815 const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3816 const uint8_t* topRight = input + topRightOffsetsElements[i];
3817
3818 const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3819 const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3820
3821 const unsigned int tx_ty_ = tx_ty_s[i];
3822 const unsigned int txty_ = txty_s[i];
3823 const unsigned int tx_ty = tx_tys[i];
3824 const unsigned int txty = txtys[i];
3825
3826 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3827
3828 for (unsigned int n = 0u; n < tChannels; ++n)
3829 {
3830 ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3831 }
3832 }
3833 else
3834 {
3835 *outputPixelData = *bColor;
3836 }
3837
3838 outputPixelData++;
3839 }
3840 }
3841 else
3842 {
3843 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3844 outputPixelData += 4;
3845 }
3846 }
3847 }
3848}
3849
3850template <>
3851OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3852{
3853 ocean_assert(source != nullptr);
3854 ocean_assert(targetPositionPixels != nullptr);
3855
3856 // as we do not initialize the following intermediate data,
3857 // we hopefully will not allocate memory on the stack each time this function is called
3858 DataType<uint8_t, 1u>::Type pixels[16];
3859
3860 // we will store the pixel information in the following pattern:
3861 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3862 // BR3 BL3 TR3 TL3 BR2 BL2 TR2 TL2 BR1 BL1 TR1 TL1 BR0 BL0 TR0 TL0
3863
3864 // we gather the individual source pixel values from the source image,
3865 // based on the calculated pixel locations
3866 for (unsigned int i = 0u; i < 4u; ++i)
3867 {
3868 if (validPixels[i])
3869 {
3870 pixels[i * 4u + 0u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopLeftElements[i]));
3871 pixels[i * 4u + 1u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopRightElements[i]));
3872 pixels[i * 4u + 2u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomLeftElements[i]));
3873 pixels[i * 4u + 3u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomRightElements[i]));
3874 }
3875 else
3876 {
3877 pixels[i * 4u + 0u] = borderColor;
3878 pixels[i * 4u + 1u] = borderColor;
3879 pixels[i * 4u + 2u] = borderColor;
3880 pixels[i * 4u + 3u] = borderColor;
3881 }
3882 }
3883
3884 static_assert(sizeof(uint8x16_t) == sizeof(pixels), "Invalid data type!");
3885
3886 const uint8x16_t m128_pixels = vld1q_u8((const uint8_t*)pixels);
3887
3888
3889 // factorLeft = 128 - factorRight
3890 // factorTop = 128 - factorBottom
3891
3892 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3893 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3894
3895 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3896 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3897
3898 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3899 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3900 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3901 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3902
3903 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3904 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3905 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3906
3907 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3908 // FEDC BA98 7654 3210
3909 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3910
3911
3912 // we will simply extract each channel from the source pixels,
3913 // each extracted channel will be multiplied by the corresponding interpolation factor
3914 // and all interpolation results will be accumulated afterwards
3915
3916 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3917
3918 const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3919 const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3920 const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3921 const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3922
3923 const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3924
3925 // we add 8192 and shift by 14 bits
3926
3927 const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3928
3929 // finally we have the following result:
3930 // ---C ---8 ---4 ---0
3931 // and we need to extract the four pixel values:
3932 //
3933 // NOTE: Because of a possible bug in Clang affecting ARMv7, vget_lane_u32()
3934 // seems to assume 32-bit memory alignment for output location, which cannot
3935 // be guaranteed. This results in bus errors and crashes the application.
3936 // ARM64 is not affected.
3937#if defined(__aarch64__)
3938
3939 const uint8x8_t m64_mask0 = {0, 4, 1, 1, 1, 1, 1, 1};
3940 const uint8x8_t m64_mask1 = {1, 1, 0, 4, 1, 1, 1, 1};
3941
3942 const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3943 const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3944
3945 const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3946
3947 const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3948 memcpy(targetPositionPixels, &result, sizeof(uint32_t));
3949
3950#else
3951
3952 *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3953 *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3954 *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3955 *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3956
3957#endif
3958}
3959
3960OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels)
3961{
3962 const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16); // factorLeft = 128 - factorRight, factorTop = 128 - factorBottomv
3963
3964 const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3965 const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
3966
3967 const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
3968 const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
3969
3970 const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8); // intermediateTop = topLeft * factorLeft + topRight * factorRight
3971 const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8); // intermediateBottom = bottomLeft * factorLeft + bottomRight * factorRight
3972
3973 const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8)); // result = intermediateTop * factorTop + intermediateBottom + factorBottom
3974 const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
3975
3976 const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14)); // round(result / 16384.0)
3977
3978 const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
3979
3980 vst1_u8(targetPositionPixels, result_8x8);
3981}
3982
3983template <>
3984OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 2u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
3985{
3986 ocean_assert(source != nullptr);
3987 ocean_assert(targetPositionPixels != nullptr);
3988
3989 typedef typename DataType<uint8_t, 2u>::Type PixelType;
3990
3991 // as we do not initialize the following intermediate data,
3992 // we hopefully will not allocate memory on the stack each time this function is called
3993 PixelType topPixels[8];
3994 PixelType bottomPixels[8];
3995
3996 // we will store the pixel information in the following pattern (here for YA):
3997 // FE DC BA 98 76 54 32 10
3998 // YA YA YA YA YA YA YA YA
3999 // TR TL TR TL TR TL TR TL
4000
4001 // we gather the individual source pixel values from the source image,
4002 // based on the calculated pixel locations
4003 for (unsigned int i = 0u; i < 4u; ++i)
4004 {
4005 if (validPixels[i])
4006 {
4007 *(topPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4008 *(topPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4009 *(bottomPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4010 *(bottomPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4011 }
4012 else
4013 {
4014 *(topPixels + i * 2u + 0u) = borderColor;
4015 *(topPixels + i * 2u + 1u) = borderColor;
4016 *(bottomPixels + i * 2u + 0u) = borderColor;
4017 *(bottomPixels + i * 2u + 1u) = borderColor;
4018 }
4019 }
4020
4021 static_assert(sizeof(uint32x4_t) == sizeof(topPixels), "Invalid data type!");
4022
4023 const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topPixels));
4024 const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomPixels));
4025
4026
4027 // factorLeft = 128 - factorRight
4028 // factorTop = 128 - factorBottom
4029
4030 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4031 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4032
4033 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4034 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4035
4036 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4037 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4038 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4039 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4040
4041
4042 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4043
4044 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4045 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4046
4047 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4048 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4049
4050 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4051 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4052
4053 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4054 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4055
4056
4057 // we add 8192 and shift by 14 bits
4058
4059 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4060 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4061
4062 // finaly we blend the interpolation results together to get the following pattern:
4063 // FE DC BA 98 76 54 32 10
4064 // 00 YA 00 YA 00 YA 00 YA
4065
4066 const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4067
4068 // we shuffle the 128 bit register to a 64 bit register:
4069
4070 const uint8x8_t m64_mask0 = {0, 1, 4, 5, 2, 2, 2, 2};
4071 const uint8x8_t m64_mask1 = {2, 2, 2, 2, 0, 1, 4, 5};
4072
4073 const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4074 const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4075
4076 const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4077
4078 // no we can store the following pattern as one block:
4079
4080 // 76 54 32 10
4081 // YA YA YA YA
4082
4083 vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4084}
4085
4086template <>
4087OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4088{
4089 ocean_assert(source != nullptr);
4090 ocean_assert(targetPositionPixels != nullptr);
4091
4092 // as we do not initialize the following intermediate data,
4093 // we hopefully will not allocate memory on the stack each time this function is called
4094 uint32_t topLeftPixels[4];
4095 uint32_t topRightPixels[4];
4096 uint32_t bottomLeftPixels[4];
4097 uint32_t bottomRightPixels[4];
4098
4099 // we will store the pixel information in the following pattern, note the padding byte after each pixel (here for RGB):
4100 // FEDCBA9876543210
4101 // BGR BGR BGR BGR
4102
4103 // we gather the individual source pixel values from the source image,
4104 // based on the calculated pixel locations
4105 for (unsigned int i = 0u; i < 4u; ++i)
4106 {
4107 if (validPixels[i])
4108 {
4109 memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i], sizeof(uint8_t) * 3);
4110 memcpy(topRightPixels + i, source + offsetsTopRightElements[i], sizeof(uint8_t) * 3);
4111 memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i], sizeof(uint8_t) * 3);
4112 memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i], sizeof(uint8_t) * 3);
4113 }
4114 else
4115 {
4116 memcpy(topLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4117 memcpy(topRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4118 memcpy(bottomLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4119 memcpy(bottomRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4120 }
4121 }
4122
4123 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4124
4125 const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4126 const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4127 const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4128 const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4129
4130
4131 // factorLeft = 128 - factorRight
4132 // factorTop = 128 - factorBottom
4133
4134 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4135 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4136
4137 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4138 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4139
4140 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4141 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4142 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4143 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4144
4145
4146 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4147
4148 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4149 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4150 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4151
4152 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4153 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4154 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4155
4156 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4157 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4158 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4159
4160 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4161 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4162 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4163
4164
4165 // we add 8192 and shift by 14 bits
4166
4167 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4168 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4169 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4170
4171 // finaly we blend the interpolation results together
4172
4173 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4174
4175 // we have to extract the get rid of the padding byte:
4176 // FEDCBA9876543210
4177 // BGR BGR BGR BGR
4178
4179 uint32_t intermediateBuffer[4];
4180 vst1q_u32(intermediateBuffer, m128_interpolation);
4181
4182 for (unsigned int i = 0u; i < 4u; ++i)
4183 {
4184 memcpy(targetPositionPixels + i, intermediateBuffer + i, sizeof(uint8_t) * 3);
4185 }
4186}
4187
4188template <>
4189OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4190{
4191 ocean_assert(source != nullptr);
4192 ocean_assert(targetPositionPixels != nullptr);
4193
4194 typedef typename DataType<uint8_t, 4u>::Type PixelType;
4195
4196 // as we do not initialize the following intermediate data,
4197 // we hopefully will not allocate memory on the stack each time this function is called
4198 PixelType topLeftPixels[4];
4199 PixelType topRightPixels[4];
4200 PixelType bottomLeftPixels[4];
4201 PixelType bottomRightPixels[4];
4202
4203 // we will store the pixel information in the following pattern (here for RGBA):
4204 // FEDC BA98 7654 3210
4205 // ABGR ABGR ABGR ABGR
4206
4207 // we gather the individual source pixel values from the source image,
4208 // based on the calculated pixel locations
4209 for (unsigned int i = 0u; i < 4u; ++i)
4210 {
4211 if (validPixels[i])
4212 {
4213 *(topLeftPixels + i) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4214 *(topRightPixels + i) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4215 *(bottomLeftPixels + i) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4216 *(bottomRightPixels + i) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4217 }
4218 else
4219 {
4220 *(topLeftPixels + i) = borderColor;
4221 *(topRightPixels + i) = borderColor;
4222 *(bottomLeftPixels + i) = borderColor;
4223 *(bottomRightPixels + i) = borderColor;
4224 }
4225 }
4226
4227 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4228
4229 const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topLeftPixels));
4230 const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topRightPixels));
4231 const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomLeftPixels));
4232 const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomRightPixels));
4233
4234
4235 // factorLeft = 128 - factorRight
4236 // factorTop = 128 - factorBottom
4237
4238 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4239 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4240
4241 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4242 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4243
4244 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4245 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4246 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4247 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4248
4249
4250 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4251
4252 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4253 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4254 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4255 uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4256
4257 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4258 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4259 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4260 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4261
4262 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4263 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4264 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4265 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4266
4267 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4268 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4269 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4270 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4271
4272
4273 // we add 8192 and shift by 14 bits
4274
4275 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4276 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4277 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4278 const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4279
4280 // finaly we blend the interpolation results together
4281
4282 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4283
4284 vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4285}
4286
4287template <unsigned int tChannels>
4288OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4289{
4290 ocean_assert(source != nullptr);
4291 ocean_assert(targetPositionPixels != nullptr);
4292
4293 // as we do not initialize the following intermediate data,
4294 // we hopefully will not allocate memory on the stack each time this function is called
4295 unsigned int factorsTopLeft[4];
4296 unsigned int factorsTopRight[4];
4297 unsigned int factorsBottomLeft[4];
4298 unsigned int factorsBottomRight[4];
4299
4300
4301 // factorLeft = 128 - factorRight
4302 // factorTop = 128 - factorBottom
4303
4304 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4305 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4306
4307 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4308 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4309
4310 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4311 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4312 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4313 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4314
4315
4316 // we store the interpolation factors
4317 vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4318 vst1q_u32(factorsTopRight, m128_factorsTopRight);
4319 vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4320 vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4321
4322 for (unsigned int i = 0u; i < 4u; ++i)
4323 {
4324 if (validPixels[i])
4325 {
4326 const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4327 const uint8_t* topRight = source + offsetsTopRightElements[i];
4328
4329 const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4330 const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4331
4332 const unsigned int& factorTopLeft = factorsTopLeft[i];
4333 const unsigned int& factorTopRight = factorsTopRight[i];
4334 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4335 const unsigned int& factorBottomRight = factorsBottomRight[i];
4336
4337 for (unsigned int n = 0u; n < tChannels; ++n)
4338 {
4339 ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4340 }
4341 }
4342 else
4343 {
4344 *targetPositionPixels = borderColor;
4345 }
4346
4347 targetPositionPixels++;
4348 }
4349}
4350
4351#endif // OCEAN_HARDWARE_NEON_VERSION
4352
4353template <unsigned int tChannels>
4354inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4355{
4356 static_assert(tChannels >= 1u, "Invalid channel number!");
4357
4358 ocean_assert(input && output);
4359 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4360 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4361
4362 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4363 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4364 ocean_assert(homographies);
4365
4366 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4367
4368 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4369 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4370
4371 constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4372 const uint8_t* const bColor = borderColor ? borderColor : zeroColor;
4373
4374 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4375
4376 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4377 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4378
4379 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4380 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4381
4382 ocean_assert(right - left > Numeric::eps());
4383 ocean_assert(bottom - top > Numeric::eps());
4384
4385 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4386 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4387
4388 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4389 {
4390 for (unsigned int x = 0; x < outputWidth; ++x)
4391 {
4392 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4393
4394 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4395 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4396
4397 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4398
4399 const Scalar tx = 1 - _tx;
4400 const Scalar ty = 1 - _ty;
4401
4402 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4403 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4404 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4405 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4406
4407 const Scalar tTopLeft = tx * ty;
4408 const Scalar tTopRight = _tx * ty;
4409 const Scalar tBottomLeft = tx * _ty;
4410 const Scalar tBottomRight = _tx * _ty;
4411
4412 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4413 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4414
4415 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4416 {
4417 for (unsigned int c = 0u; c < tChannels; ++c)
4418 {
4419 outputData[c] = bColor[c];
4420 }
4421 }
4422 else
4423 {
4424 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4425 }
4426
4427 outputData += tChannels;
4428 }
4429
4430 outputData += outputPaddingElements;
4431 }
4432}
4433
4434template <unsigned int tChannels>
4435void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, unsigned int firstOutputRow, const unsigned int numberOutputRows)
4436{
4437 static_assert(tChannels >= 1u, "Invalid channel number!");
4438
4439 ocean_assert(input != nullptr && output != nullptr);
4440 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4441 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4442 ocean_assert(input_H_output != nullptr);
4443
4444 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4445
4446 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4447 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4448
4449 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4450 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4451
4452 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4453
4454 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4455 {
4456 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4457 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4458
4459 /*
4460 * We can slightly optimize the 3x3 matrix multiplication:
4461 *
4462 * | X0 Y0 Z0 | | x |
4463 * | X1 Y1 Z1 | * | y |
4464 * | X2 Y2 Z2 | | 1 |
4465 *
4466 * | x' | | X0 * x | | Y0 * y + Z0 |
4467 * | y' | = | X1 * x | + | Y1 * y + Z1 |
4468 * | z' | | X2 * x | | Y2 * y + Z2 |
4469 *
4470 * As y is constant within the inner loop, we can pre-calculate the following terms:
4471 *
4472 * | x' | | (X0 * x + constValue0) / (X2 * x + constValue2) |
4473 * | y' | = | (X1 * x + constValue1) / (X2 * x + constValue2) |
4474 *
4475 * | p | = | (X * x + c) / (X2 * x + constValue2) |
4476 */
4477
4478 const Vector2 X(input_H_output->data() + 0);
4479 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
4480
4481 const Scalar X2 = (*input_H_output)(2, 0);
4482 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
4483
4484 for (unsigned int x = 0; x < outputWidth; ++x)
4485 {
4486 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
4487
4488#ifdef OCEAN_DEBUG
4489 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
4490 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
4491#endif
4492
4493 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4494 {
4495 *outputMaskData = 0xFF - maskValue;
4496 }
4497 else
4498 {
4499 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4500 *outputMaskData = maskValue;
4501 }
4502
4503 outputData++;
4504 outputMaskData++;
4505 }
4506 }
4507}
4508
4509template <unsigned int tChannels>
4510inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4511{
4512 static_assert(tChannels >= 1u, "Invalid channel number!");
4513
4514 ocean_assert(input && output);
4515 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4516 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4517
4518 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4519 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4520 ocean_assert(homographies);
4521
4522 const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4523 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4524
4525 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4526 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4527
4528 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4529 outputMask += firstOutputRow * outputMaskStrideElements;
4530
4531 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4532 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4533
4534 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4535 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4536
4537 ocean_assert(right - left > Numeric::eps());
4538 ocean_assert(bottom - top > Numeric::eps());
4539
4540 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4541 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4542
4543 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4544 {
4545 for (unsigned int x = 0u; x < outputWidth; ++x)
4546 {
4547 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4548
4549 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4550 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4551
4552 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4553
4554 const Scalar tx = 1 - _tx;
4555 const Scalar ty = 1 - _ty;
4556
4557 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4558 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4559 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4560 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4561
4562 const Scalar tTopLeft = tx * ty;
4563 const Scalar tTopRight = _tx * ty;
4564 const Scalar tBottomLeft = tx * _ty;
4565 const Scalar tBottomRight = _tx * _ty;
4566
4567 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4568 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4569
4570 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4571 {
4572 *outputMask = 0xFFu - maskValue;
4573 }
4574 else
4575 {
4576 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4577 *outputMask = maskValue;
4578 }
4579
4580 outputData += tChannels;
4581 outputMask++;
4582 }
4583
4584 outputData += outputPaddingElements;
4585 outputMask += outputMaskPaddingElements;
4586 }
4587}
4588
4589template <unsigned int tChannels>
4590void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4591{
4592 static_assert(tChannels >= 1u, "Invalid channel number!");
4593
4594 ocean_assert(inputCamera && outputCamera && normalizedHomography);
4595 ocean_assert(input && output);
4596
4597 ocean_assert(firstRow + numberRows <= outputCamera->height());
4598
4599 const unsigned int outputStrideElements = tChannels * outputCamera->width() + outputPaddingElements;
4600
4601 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4602 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4603
4604 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4605
4606 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4607
4608 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4609 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4610
4611 uint8_t* outputData = output + firstRow * outputStrideElements;
4612
4613 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4614 {
4615 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4616 {
4617 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4618
4619 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4620 {
4621 *((PixelType*)outputData) = *bColor;
4622 }
4623 else
4624 {
4625 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4626 }
4627
4628 outputData += tChannels;
4629 }
4630
4631 outputData += outputPaddingElements;
4632 }
4633}
4634
4635template <unsigned int tChannels>
4636void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
4637{
4638 static_assert(tChannels >= 1u, "Invalid channel number!");
4639
4640 ocean_assert(inputCamera != nullptr && outputCamera != nullptr && normalizedHomography != nullptr);
4641 ocean_assert(input != nullptr && output != nullptr);
4642
4643 ocean_assert(firstRow + numberRows <= outputCamera->height());
4644
4645 const unsigned int outputStrideElements = outputCamera->width() * tChannels + outputPaddingElements;
4646 const unsigned int outputMaskStrideElements = outputCamera->width() + outputMaskPaddingElements;
4647
4648 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4649 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4650
4651 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4652
4653 uint8_t* outputData = output + firstRow * outputStrideElements;
4654 outputMask += firstRow * outputMaskStrideElements;
4655
4656 constexpr bool useDistortionParameters = true;
4657
4658 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4659 {
4660 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4661 {
4662 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4663
4664 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4665 {
4666 *outputMask = 0xFF - maskValue;
4667 }
4668 else
4669 {
4670 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4671 *outputMask = maskValue;
4672 }
4673
4674 outputData += tChannels;
4675 ++outputMask;
4676 }
4677
4678 outputData += outputPaddingElements;
4679 outputMask += outputMaskPaddingElements;
4680 }
4681}
4682
4683template <unsigned int tChannels>
4684void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4685{
4686 static_assert(tChannels >= 1u, "Invalid channel number!");
4687
4688 ocean_assert(input_LT_output != nullptr);
4689 ocean_assert(input != nullptr && output != nullptr);
4690
4691 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4692 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4693
4694 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
4695
4696 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4697 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4698
4699 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4700
4701 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4702
4703 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4704
4705 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4706 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4707
4708 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4709 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4710
4711 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4712 {
4713 input_LT_output->bilinearValues(y, rowLookupData);
4714
4715 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4716
4717 for (unsigned int x = 0u; x < columns; ++x)
4718 {
4719 const Vector2& lookupValue = rowLookupData[x];
4720
4721 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4722
4723 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4724 {
4725 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4726 }
4727 else
4728 {
4729 *outputData = *bColor;
4730 }
4731
4732 outputData++;
4733 }
4734 }
4735}
4736
4737template <typename T, unsigned int tChannels>
4738void FrameInterpolatorBilinear::lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4739{
4740 static_assert(tChannels >= 1u, "Invalid channel number!");
4741
4742 ocean_assert((!std::is_same<uint8_t, T>::value));
4743
4744 ocean_assert(input_LT_output != nullptr);
4745 ocean_assert(input != nullptr && output != nullptr);
4746
4747 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4748 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4749
4750 typedef typename DataType<T, tChannels>::Type PixelType;
4751
4752 const T zeroColor[tChannels] = {T(0)};
4753 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4754
4755 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4756
4757 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4758
4759 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4760
4761 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4762 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4763
4764 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4765 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4766
4767 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4768 {
4769 input_LT_output->bilinearValues(y, rowLookupData);
4770
4771 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4772
4773 for (unsigned int x = 0u; x < columns; ++x)
4774 {
4775 const Vector2& lookupValue = rowLookupData[x];
4776
4777 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4778
4779 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4780 {
4781 interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4782 }
4783 else
4784 {
4785 *outputData = *bColor;
4786 }
4787
4788 outputData++;
4789 }
4790 }
4791}
4792
4793#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4794
4795template <>
4796inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4797{
4798 ocean_assert(input_LT_output != nullptr);
4799 ocean_assert(input != nullptr && output != nullptr);
4800
4801 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4802 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4803
4804 typedef uint8_t PixelType;
4805
4806 const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4807
4808 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
4809 ocean_assert(outputWidth >= 8u);
4810
4811 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4812
4813 const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4814 const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4815
4816 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4817 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
4818
4819 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
4820 const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
4821
4822 // [0.0f, 1.0f, 2.0f, 3.0f, ...]
4823 const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4824 const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4825 const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4826
4827 const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4828
4829 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4830
4831 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4832
4833 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
4834 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
4835
4836 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4837 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4838
4839 unsigned int validPixels[8];
4840
4841 unsigned int topLeftOffsetsElements[8];
4842 unsigned int bottomLeftOffsetsElements[8];
4843
4844 uint8_t pixels[32];
4845
4846 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4847 {
4848 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4849
4850 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
4851
4852 float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4853 float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4854
4855 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
4856
4857 for (unsigned int x = 0u; x < outputWidth; x += 8u)
4858 {
4859 if (x + 8u > outputWidth)
4860 {
4861 // the last iteration will not fit into the output frame,
4862 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
4863
4864 ocean_assert(x >= 8u && outputWidth > 8u);
4865 const unsigned int newX = outputWidth - 8u;
4866
4867 ocean_assert(x > newX);
4868 const unsigned int xOffset = x - newX;
4869
4870 outputPixelData -= xOffset;
4871
4872 if (offset)
4873 {
4874 additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(float(xOffset)));
4875 additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(float(xOffset)));
4876 }
4877
4878 x = newX;
4879
4880 // the for loop will stop after this iteration
4881 ocean_assert(!(x + 8u < outputWidth));
4882 }
4883
4884 const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 0u));
4885 const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 4u));
4886
4887 float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
4888 float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
4889
4890 float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
4891 float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
4892
4893 if (offset)
4894 {
4895 inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
4896 inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
4897
4898 inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
4899 inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
4900
4901 additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
4902 additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
4903 }
4904
4905 // now we check whether we are inside the input frame
4906 const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() < (inputWidth - 1) ? 0xFFFFFF : 0x000000
4907 const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
4908
4909 const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() < (inputHeight - 1) ? 0xFFFFFF : 0x000000
4910 const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
4911
4912 const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
4913 const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
4914
4915 vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
4916 vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
4917
4918
4919 const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
4920 const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
4921
4922 const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
4923 const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
4924
4925 const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4926 const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4927
4928
4929 const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
4930 vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
4931 const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
4932 vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
4933
4934 const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
4935 vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
4936 const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
4937 vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
4938
4939
4940 // we determine the fractional portions of the x' and y' and [0.0, 1.0] -> [0, 128]
4941 float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
4942 float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
4943
4944 float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
4945 float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
4946
4947 const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
4948 const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
4949
4950 const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
4951 const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
4952
4953 const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
4954 const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
4955
4956 const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
4957
4958
4959 vst1q_u8(pixels + 0, constantBorderColor_u_8x16); // initialize with border color
4960 vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
4961
4962 struct LeftRightPixel
4963 {
4964 uint8_t left;
4965 uint8_t right;
4966 };
4967
4968 static_assert(sizeof(LeftRightPixel) == 2, "Invalid data type!");
4969
4970 // we gather the individual source pixel values from the source image,
4971 // based on the calculated pixel locations
4972 for (unsigned int i = 0u; i < 8u; ++i)
4973 {
4974 if (validPixels[i])
4975 {
4976 ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u); // we need to have one additional pixel to the right (as we copy two pixels at once)
4977 ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
4978
4979 ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
4980 ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
4981 }
4982 }
4983
4984 const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
4985 const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
4986
4987 interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
4988
4989 outputPixelData += 8;
4990 }
4991 }
4992}
4993
4994template <unsigned int tChannels>
4995void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4996{
4997 ocean_assert(input_LT_output != nullptr);
4998 ocean_assert(input != nullptr && output != nullptr);
4999
5000 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5001 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5002
5003 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5004
5005 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5006 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5007
5008 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
5009 ocean_assert(outputWidth >= 4u);
5010
5011 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5012
5013 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5014 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5015
5016 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5017 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
5018
5019 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
5020 const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
5021
5022 // [0.0f, 1.0f, 2.0f, 3.0f]
5023 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5024 float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5025
5026 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5027
5028 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5029
5030 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
5031 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
5032
5033 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5034 const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5035 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5036
5037 unsigned int validPixels[4];
5038
5039 unsigned int topLeftOffsetsElements[4];
5040 unsigned int topRightOffsetsElements[4];
5041 unsigned int bottomLeftOffsetsElements[4];
5042 unsigned int bottomRightOffsetsElements[4];
5043
5044 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5045 {
5046 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5047
5048 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
5049
5050 float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5051 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
5052
5053 for (unsigned int x = 0u; x < outputWidth; x += 4u)
5054 {
5055 if (x + 4u > outputWidth)
5056 {
5057 // the last iteration will not fit into the output frame,
5058 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
5059
5060 ocean_assert(x >= 4u && outputWidth > 4u);
5061 const unsigned int newX = outputWidth - 4u;
5062
5063 ocean_assert(x > newX);
5064 const unsigned int xOffset = x - newX;
5065
5066 outputPixelData -= xOffset;
5067
5068 if (offset)
5069 {
5070 additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(float(xOffset)));
5071 }
5072
5073 x = newX;
5074
5075 // the for loop will stop after this iteration
5076 ocean_assert(!(x + 4u < outputWidth));
5077 }
5078
5079 const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x));
5080
5081 float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5082 float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5083
5084 if (offset)
5085 {
5086 inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5087 inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5088
5089 additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5090 }
5091
5092 // now we check whether we are inside the input frame
5093 const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
5094 const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
5095
5096 const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
5097
5098 vst1q_u32(validPixels, validPixels_u_32x4);
5099
5100 const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5101 const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5102
5103 const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5104 const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5105
5106 const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
5107 const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5108 const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5109 const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5110
5111 vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5112 vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5113 vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5114 vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5115
5116 // we determine the fractional portions of the x' and y':
5117 float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5118 float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5119
5120 // we use integer interpolation [0.0, 1.0] -> [0, 128]
5121 tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5122 ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5123
5124 const uint32x4_t tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5125 const uint32x4_t ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5126
5127 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5128
5129 outputPixelData += 4;
5130 }
5131 }
5132}
5133
5134#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5135
5136template <unsigned int tChannels>
5137void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5138{
5139 ocean_assert(input_LT_output != nullptr);
5140 ocean_assert(input != nullptr && output != nullptr);
5141
5142 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5143 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5144
5145 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
5146
5147 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
5148
5149 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5150 const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5151
5152 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5153
5154 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
5155 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
5156
5157 Memory rowLookupMemory = Memory::create<Vector2>(columns);
5158 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
5159
5160 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5161 {
5162 input_LT_output->bilinearValues(y, rowLookupData);
5163
5164 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5165 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5166
5167 for (unsigned int x = 0u; x < columns; ++x)
5168 {
5169 const Vector2& lookupValue = rowLookupData[x];
5170
5171 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
5172
5173 if (inputPosition.x() >= 0 && inputPosition.y() >= 0 && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
5174 {
5175 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5176 *outputMaskData = maskValue;
5177 }
5178 else
5179 {
5180 *outputMaskData = 0xFFu - maskValue;
5181 }
5182
5183 outputData++;
5184 outputMaskData++;
5185 }
5186 }
5187}
5188
5189template <unsigned int tChannels>
5190void FrameInterpolatorBilinear::scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
5191{
5192 ocean_assert(source != nullptr && target != nullptr);
5193 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5194 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5195 ocean_assert(sourceX_s_targetX > 0.0);
5196 ocean_assert(sourceY_s_targetY > 0.0);
5197
5198 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5199 {
5200 FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5201 return;
5202 }
5203
5204 if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5205 {
5206#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5207 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5208 {
5209 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5210 return;
5211 }
5212#else
5213 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5214#endif
5215 }
5216 else
5217 {
5218 if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5219 {
5220#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5221 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5222 {
5223 scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5224 return;
5225 }
5226#endif
5227 }
5228
5229 scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5230 }
5231}
5232
5233template <unsigned int tChannels>
5234void FrameInterpolatorBilinear::scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5235{
5236 ocean_assert(source != nullptr && target != nullptr);
5237 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5238 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5239 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5240
5241 const Scalar sourceX_T_targetX = Scalar(sourceX_s_targetX);
5242 const Scalar sourceY_T_targetY = Scalar(sourceY_s_targetY);
5243
5244 /*
5245 * We determine the sub-pixel accurate source location for each target pixel as follows:
5246 *
5247 * Example with a downsampling by factor 4:
5248 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
5249 * targetRow with 3 pixels: | 0 1 2 |
5250 *
5251 * Thus, the source row can be separated into three blocks;
5252 * and we want to extract the color information from the center of the blocks:
5253 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
5254 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 4)
5255 *
5256 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
5257 * and subtract 0.5 again afterwards:
5258 * sourceX = (targetX + 0.5) * sourceX_s_targetX - 0.5
5259 *
5260 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
5261 * (1 + 0.5) * 4 - 0.5 = 5.5
5262 *
5263 *
5264 * Example with a downsampling by factor 3:
5265 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
5266 * targetRow with 3 pixels: | 0 1 2 |
5267 *
5268 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
5269 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 3)
5270 *
5271 * e.g., (0 + 0.5) * 3 - 0.5 = 1
5272 * (1 + 0.5) * 3 - 0.5 = 4
5273 *
5274 *
5275 * Example with a downsampling by factor 2:
5276 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
5277 * targetRow with 3 pixels: | 0 1 2 |
5278 *
5279 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
5280 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 2)
5281 *
5282 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
5283 * (1 + 0.5) * 2 - 0.5 = 2.5
5284 *
5285 *
5286 * we can simplify the calculation (as we have a constant term):
5287 * sourceX = (sourceX_s_targetX * targetX) + (sourceX_s_targetX * 0.5 - 0.5)
5288 */
5289
5290 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5291
5292 const Scalar sourceX_T_targetXOffset = sourceX_T_targetX * Scalar(0.5) - Scalar(0.5);
5293 const Scalar sourceY_T_targetYOffset = sourceY_T_targetY * Scalar(0.5) - Scalar(0.5);
5294
5295 const Scalar sourceWidth_1 = Scalar(sourceWidth - 1u);
5296 const Scalar sourceHeight_1 = Scalar(sourceHeight - 1u);
5297
5298 target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5299
5300 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5301 {
5302 const Scalar sy = minmax(Scalar(0), sourceY_T_targetYOffset + sourceY_T_targetY * Scalar(y), sourceHeight_1);
5303 ocean_assert(sy >= Scalar(0) && sy < Scalar(sourceHeight));
5304
5305 const unsigned int sTop = (unsigned int)sy;
5306 ocean_assert(sy >= Scalar(sTop));
5307
5308 const Scalar ty = sy - Scalar(sTop);
5309 ocean_assert(ty >= 0 && ty <= 1);
5310
5311 const unsigned int factorBottom = (unsigned int)(ty * Scalar(128) + Scalar(0.5));
5312 const unsigned int factorTop = 128u - factorBottom;
5313
5314 const uint8_t* const sourceTop = source + sourceStrideElements * sTop;
5315 const uint8_t* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5316
5317 for (unsigned int x = 0; x < targetWidth; ++x)
5318 {
5319 const Scalar sx = minmax(Scalar(0), sourceX_T_targetXOffset + sourceX_T_targetX * Scalar(x), sourceWidth_1);
5320 ocean_assert(sx >= Scalar(0) && sx < Scalar(sourceWidth));
5321
5322 const unsigned int sLeft = (unsigned int)sx;
5323 ocean_assert(sx >= Scalar(sLeft));
5324
5325 const Scalar tx = sx - Scalar(sLeft);
5326 ocean_assert(tx >= 0 && tx <= 1);
5327
5328 const unsigned int factorRight = (unsigned int)(tx * Scalar(128) + Scalar(0.5));
5329 const unsigned int factorLeft = 128u - factorRight;
5330
5331 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5332
5333 const uint8_t* const sourceTopLeft = sourceTop + sLeft * tChannels;
5334 const uint8_t* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5335
5336 const unsigned int factorTopLeft = factorTop * factorLeft;
5337 const unsigned int factorTopRight = factorTop * factorRight;
5338 const unsigned int factorBottomLeft = factorBottom * factorLeft;
5339 const unsigned int factorBottomRight = factorBottom * factorRight;
5340
5341 for (unsigned int n = 0u; n < tChannels; ++n)
5342 {
5343 target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5344 + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5345 }
5346
5347 target += tChannels;
5348 }
5349
5350 target += targetPaddingElements;
5351 }
5352}
5353
5354template <typename T>
5355void FrameInterpolatorBilinear::interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom)
5356{
5357 ocean_assert(sourceRowTop != nullptr);
5358 ocean_assert(sourceRowBottom != nullptr);
5359 ocean_assert(targetRow != nullptr);
5360 ocean_assert(elements >= 1u);
5361 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5362
5363 typedef typename FloatTyper<T>::Type FloatType;
5364
5365 const FloatType internalFactorBottom = FloatType(factorBottom);
5366 const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5367
5368 for (unsigned int n = 0u; n < elements; ++n)
5369 {
5370 targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5371 }
5372}
5373
5374template <typename T, unsigned int tChannels>
5375void FrameInterpolatorBilinear::interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
5376{
5377 static_assert(tChannels != 0u, "Invalid channel number!");
5378
5379 ocean_assert(extendedSourceRow != nullptr);
5380 ocean_assert(targetRow != nullptr);
5381 ocean_assert(targetWidth >= 1u);
5382 ocean_assert(interpolationLocations != nullptr);
5383 ocean_assert(interpolationFactorsRight != nullptr);
5384 ocean_assert(channels == tChannels);
5385
5386 typedef typename FloatTyper<T>::Type FloatType;
5387
5388 for (unsigned int x = 0u; x < targetWidth; ++x)
5389 {
5390 const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5391 ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5392
5393 const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5394
5395 const unsigned int& leftLocation = interpolationLocations[x];
5396 const unsigned int rightLocation = leftLocation + tChannels; // location is defined in relation to elements, not to pixels
5397
5398 for (unsigned int n = 0u; n < tChannels; ++n)
5399 {
5400 targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5401 }
5402 }
5403}
5404
5405#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5406
5407#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5408
5409template <>
5410inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5411{
5412 ocean_assert(source != nullptr && target != nullptr);
5413 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5414 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5415 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5416 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5417 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5418
5419 ocean_assert(sourcePaddingElements == 0u); // not supported
5420 ocean_assert(targetPaddingElements == 0u);
5421
5422 typedef typename DataType<uint8_t, 2u>::Type PixelType;
5423
5424 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5425 const PixelType* const sourcePixelData = (const PixelType*)source;
5426
5427 // our offset values for the eight left pixels in relation to the first pixel of the row
5428 unsigned int leftOffsets[8];
5429
5430 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5431 // fixedPointLocation = floatLocation * 2^16
5432 //
5433 // [FEDCBA98, 76543210]
5434 // [pixel , subpixel]
5435 //
5436 // fixedPointLocation = pixel + subpixel / 2^16
5437 //
5438 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5439 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5440
5441 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5442 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5443
5444 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5445 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5446
5447 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5448 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5449
5450 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5451 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5452
5453 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5454 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5455
5456 // we store 4 integers: [0, 0, 0, 0]
5457 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5458
5459 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5460 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5461
5462 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5463 {
5464 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5465
5466 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5467 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5468 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5469
5470 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5471 // factorTop = 128 - factorBottom
5472 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5473
5474 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5475
5476 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5477 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5478
5479 for (unsigned int x = 0; x < targetWidth; x += 8u)
5480 {
5481 if (x + 8u > targetWidth)
5482 {
5483 // the last iteration will not fit into the output frame,
5484 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5485
5486 ocean_assert(x >= 8u && targetWidth > 8u);
5487 const unsigned int newX = targetWidth - 8u;
5488
5489 ocean_assert(x > newX);
5490 targetPixelData -= x - newX;
5491
5492 x = newX;
5493
5494 // the for loop will stop after this iteration
5495 ocean_assert(!(x + 8u < targetWidth));
5496 }
5497
5498
5499 // we need four successive x coordinate floats:
5500 // [x + 3, x + 2, x + 1; x + 0]
5501 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5502 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5503
5504 // we calculate the four source locations for our four target locations
5505 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5506 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5507
5508 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5509 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5510
5511 // now we determine the pixel/integer accurate source locations
5512 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5513 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5514 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5515
5516 // we store the offsets we have calculated
5517 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5518 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5519
5520
5521
5522 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
5523 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
5524
5525 uint8x8x2_t topLeftPixels;
5526 uint8x8x2_t topRightPixels;
5527
5528 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5529 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5530
5531 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5532 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5533
5534 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5535 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5536
5537 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5538 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5539
5540 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5541 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5542
5543 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5544 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5545
5546 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5547 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5548
5549 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5550 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5551
5552
5553 // we load the individual pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
5554
5555 uint8x8x2_t bottomLeftPixels;
5556 uint8x8x2_t bottomRightPixels;
5557
5558 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5559 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5560
5561 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5562 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5563
5564 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5565 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5566
5567 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5568 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5569
5570 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5571 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5572
5573 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5574 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5575
5576 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5577 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5578
5579 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5580 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5581
5582
5583
5584 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5585 // we need an accuracy of 7 bits (values between 0 and 128):
5586 // 76 54 32 10
5587 // [F3 F2 F1 F0]
5588 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5589 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5590
5591 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5592 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5593 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5594 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5595
5596
5597
5598 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5599 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5600 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5601
5602 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5603 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5604
5605 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5606 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5607
5608
5609
5610 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5611 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5612 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5613
5614 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5615 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5616
5617 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5618 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5619
5620
5621
5622 // finnally we determine the interpolation result between top and bottom row
5623 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5624 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5625
5626 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5627 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5628
5629
5630 // we narrow down the interpolation results and we store them
5631 uint8x8x2_t result;
5632 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5633 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5634
5635 // we write back the results and interleave them automatically
5636 vst2_u8((uint8_t*)targetPixelData, result);
5637
5638 targetPixelData += 8;
5639 }
5640
5641 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5642 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5643
5644 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5645
5646 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5647 {
5648 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5649
5650 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5651 ocean_assert(lastSourcePixelLeft < sourceWidth);
5652 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5653
5654 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5655
5656 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5657 const unsigned int factorLeft = 128u - factorRight;
5658
5659 for (unsigned int c = 0u; c < 2u; ++c)
5660 {
5661 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5662 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5663 }
5664 }
5665 }
5666}
5667
5668#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5669
5670#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5671
5672template <>
5673inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5674{
5675 ocean_assert(source != nullptr && target != nullptr);
5676 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5677 ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5678 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5679 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5680 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5681
5682 ocean_assert(sourcePaddingElements == 0u); // not supported
5683 ocean_assert(targetPaddingElements == 0u);
5684
5685 typedef typename DataType<uint8_t, 2u>::Type PixelType;
5686
5687 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5688 const PixelType* const sourcePixelData = (const PixelType*)source;
5689
5690 // our offset values for the four left pixels in relation to the first pixel of the row
5691 unsigned int leftOffsets[8];
5692
5693 // our color values of the eight top and bottom pixels (32 bit = 16 bit left and 16 bit right)
5694 unsigned int topPixels[8];
5695 unsigned int bottomPixels[8];
5696
5697 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5698 // fixedPointLocation = floatLocation * 2^16
5699 //
5700 // [FEDCBA98, 76543210]
5701 // [pixel , subpixel]
5702 //
5703 // fixedPointLocation = pixel + subpixel / 2^16
5704 //
5705 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5706 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5707
5708 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5709 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5710
5711 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5712 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5713
5714 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5715 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5716
5717 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5718 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5719
5720 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5721 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5722
5723 // we store 4 integers: [0, 0, 0, 0]
5724 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5725
5726 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5727 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5728
5729 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5730 {
5731 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5732
5733 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5734 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5735 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5736
5737 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5738 // factorTop = 128 - factorBottom
5739 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5740
5741 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5742
5743 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5744 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5745
5746 for (unsigned int x = 0; x < targetWidth; x += 8u)
5747 {
5748 if (x + 8u > targetWidth)
5749 {
5750 // the last iteration will not fit into the output frame,
5751 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5752
5753 ocean_assert(x >= 8u && targetWidth > 8u);
5754 const unsigned int newX = targetWidth - 8u;
5755
5756 ocean_assert(x > newX);
5757 targetPixelData -= x - newX;
5758
5759 x = newX;
5760
5761 // the for loop will stop after this iteration
5762 ocean_assert(!(x + 8u < targetWidth));
5763 }
5764
5765
5766 // we need four successive x coordinate floats:
5767 // [x + 3, x + 2, x + 1; x + 0]
5768 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5769 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5770
5771 // we calculate the four source locations for our four target locations
5772 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5773 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5774
5775 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5776 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5777
5778 // now we determine the pixel/integer accurate source locations
5779 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5780 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5781 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5782
5783 // we store the offsets we have calculated
5784 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5785 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5786
5787
5788
5789 // we load the left and the right pixels into an intermediate buffer
5790 // with following pattern (with top-left TL, and top-right TR):
5791 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5792 // [TR3 TR3 TL3 TL3 TR2 TR2 TL2 TL2 TR1 TR1 TL1 TL1 TR0 TR0 TL0 TL0]
5793 // [TR7 TR7 TL7 TL7 TR6 TR6 TL6 TL6 TR5 TR5 TL5 TL5 TR4 TR4 TL4 TL4]
5794
5795 for (unsigned int n = 0u; n < 8u; ++n)
5796 {
5797 topPixels[n] = *(unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
5798 }
5799
5800 const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
5801 const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
5802
5803 for (unsigned int n = 0u; n < 8u; ++n)
5804 {
5805 bottomPixels[n] = *(unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
5806 }
5807
5808 const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
5809 const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
5810
5811
5812 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5813 // we need an accuracy of 7 bits (values between 0 and 128):
5814 // 76 54 32 10
5815 // [F3 F2 F1 F0]
5816 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5817 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5818
5819 // as we will have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5820 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5821 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5822
5823 // nw we have the interpolation factors for 8 left and 8 right pixels:
5824 // 7 6 5 4 3 2 1 0
5825 // [F7 F6 F5 F4 F3 F2 F1 F0]
5826 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5827
5828
5829 // we de-interleave the top pixels to left and right pixels:
5830 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5831 // [TL7 TL7 TL6 TL6 TL5 TL5 TL4 TL4 TL3 TL3 TL2 TL2 TL1 TL1 TL0 TL0]
5832 // [TR7 TR7 TR6 TR6 TR5 TR5 TR4 TR4 TR3 TR3 TR2 TR2 TR1 TR1 TR0 TR0]
5833 const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
5834
5835 // we de-interleave the pixels again to separate channel 0 and channel 1:
5836 // 7 6 5 4 3 2 1 0
5837 // channel 0: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5838 // channel 1: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5839 const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
5840 const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
5841
5842 const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
5843 const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
5844
5845 const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
5846 const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
5847
5848
5849 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5850 uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
5851 uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
5852
5853 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
5854 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
5855
5856 const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5857 const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5858
5859
5860 // we proceed with the bottom pixels (as we did with the top pixels)
5861 const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
5862
5863 const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
5864 const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
5865
5866 const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
5867 const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
5868
5869 const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
5870 const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
5871
5872
5873 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5874 m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
5875 m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
5876
5877 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
5878 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
5879
5880 const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5881 const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5882
5883
5884 // finnally we determine the interpolation result between top and bottom row
5885 m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
5886 m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
5887
5888 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
5889 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
5890
5891
5892 // we narrow down the interpolation results and we store them
5893 uint8x8x2_t m2_64_result;
5894 m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5895 m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5896
5897 // we write back the results and interleave them automatically
5898 vst2_u8((uint8_t*)targetPixelData, m2_64_result);
5899
5900 targetPixelData += 8;
5901 }
5902
5903 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5904 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5905
5906 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5907
5908 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5909 {
5910 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5911
5912 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5913 ocean_assert(lastSourcePixelLeft < sourceWidth);
5914 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5915
5916 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5917
5918 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5919 const unsigned int factorLeft = 128u - factorRight;
5920
5921 for (unsigned int c = 0u; c < 2u; ++c)
5922 {
5923 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5924 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5925 }
5926 }
5927 }
5928}
5929
5930#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5931
5932#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5933
5934template <>
5935inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5936{
5937 ocean_assert(source != nullptr && target != nullptr);
5938 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5939 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5940 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5941 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5942 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5943
5944 ocean_assert(sourcePaddingElements == 0u); // not supported
5945 ocean_assert(targetPaddingElements == 0u);
5946
5947 typedef typename DataType<uint8_t, 3u>::Type PixelType;
5948
5949 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5950 const PixelType* const sourcePixelData = (const PixelType*)source;
5951
5952 // our offset values for the eight left pixels in relation to the first pixel of the row
5953 unsigned int leftOffsets[8];
5954
5955 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5956 // fixedPointLocation = floatLocation * 2^16
5957 //
5958 // [FEDCBA98, 76543210]
5959 // [pixel , subpixel]
5960 //
5961 // fixedPointLocation = pixel + subpixel / 2^16
5962 //
5963 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5964 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5965
5966 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5967 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5968
5969 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5970 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5971
5972 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5973 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5974
5975 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5976 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5977
5978 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5979 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5980
5981 // we store 4 integers: [0, 0, 0, 0]
5982 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5983
5984 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5985 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5986
5987 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5988 {
5989 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5990
5991 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5992 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5993 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5994
5995 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5996 // factorTop = 128 - factorBottom
5997 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5998
5999 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6000
6001 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6002 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6003
6004 for (unsigned int x = 0; x < targetWidth; x += 8u)
6005 {
6006 if (x + 8u > targetWidth)
6007 {
6008 // the last iteration will not fit into the output frame,
6009 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6010
6011 ocean_assert(x >= 8u && targetWidth > 8u);
6012 const unsigned int newX = targetWidth - 8u;
6013
6014 ocean_assert(x > newX);
6015 targetPixelData -= x - newX;
6016
6017 x = newX;
6018
6019 // the for loop will stop after this iteration
6020 ocean_assert(!(x + 8u < targetWidth));
6021 }
6022
6023
6024 // we need four successive x coordinate floats:
6025 // [x + 3, x + 2, x + 1; x + 0]
6026 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6027 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6028
6029 // we calculate the four source locations for our four target locations
6030 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6031 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6032
6033 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6034 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6035
6036 // now we determine the pixel/integer accurate source locations
6037 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6038 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6039 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6040
6041 // we store the offsets we have calculated
6042 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6043 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6044
6045
6046
6047 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6048 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6049
6050 uint8x8x3_t topLeftPixels;
6051 uint8x8x3_t topRightPixels;
6052
6053 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6054 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6055
6056 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6057 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6058
6059 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6060 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6061
6062 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6063 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6064
6065 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6066 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6067
6068 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6069 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6070
6071 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6072 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6073
6074 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6075 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6076
6077
6078 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6079
6080 uint8x8x3_t bottomLeftPixels;
6081 uint8x8x3_t bottomRightPixels;
6082
6083 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6084 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6085
6086 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6087 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6088
6089 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6090 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6091
6092 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6093 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6094
6095 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6096 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6097
6098 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6099 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6100
6101 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6102 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6103
6104 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6105 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6106
6107
6108
6109 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6110 // we need an accuracy of 7 bits (values between 0 and 128):
6111 // 76 54 32 10
6112 // [F3 F2 F1 F0]
6113 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6114 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6115
6116 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6117 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6118 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6119 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6120
6121
6122
6123 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6124 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6125 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6126 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6127
6128 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6129 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6130 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6131
6132 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6133 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6134 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6135
6136
6137
6138 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6139 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6140 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6141 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6142
6143 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6144 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6145 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6146
6147 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6148 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6149 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6150
6151
6152
6153 // finnally we determine the interpolation result between top and bottom row
6154 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6155 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6156 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6157
6158 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6159 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6160 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6161
6162
6163 // we narrow down the interpolation results and we store them
6164 uint8x8x3_t result;
6165 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6166 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6167 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6168
6169 // we write back the results and interleave them automatically
6170 vst3_u8((uint8_t*)targetPixelData, result);
6171
6172 targetPixelData += 8;
6173 }
6174
6175 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6176 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6177
6178 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6179
6180 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6181 {
6182 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6183
6184 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6185 ocean_assert(lastSourcePixelLeft < sourceWidth);
6186 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6187
6188 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6189
6190 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6191 const unsigned int factorLeft = 128u - factorRight;
6192
6193 for (unsigned int c = 0u; c < 3u; ++c)
6194 {
6195 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6196 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6197 }
6198 }
6199 }
6200}
6201
6202#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
6203
6204#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6205
6206/// \cond DOXYGEN_DO_NOT_DOCUMENT
6207
6208template <>
6209inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6210{
6211 ocean_assert(source != nullptr && target != nullptr);
6212 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6213 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6214 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6215 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6216 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6217
6218 ocean_assert(sourcePaddingElements == 0u); // not supported
6219 ocean_assert(targetPaddingElements == 0u);
6220
6221 typedef typename DataType<uint8_t, 4u>::Type PixelType;
6222
6223 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6224 const PixelType* const sourcePixelData = (const PixelType*)source;
6225
6226 // our offset values for the eight left pixels in relation to the first pixel of the row
6227 unsigned int leftOffsets[8];
6228
6229 // this function uses fixed point numbers with 16 bit for the calculation of const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6230 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6231
6232 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
6233 // fixedPointLocation = floatLocation * 2^16
6234 //
6235 // [FEDCBA98, 76543210]
6236 // [pixel , subpixel]
6237 //
6238 // fixedPointLocation = pixel + subpixel / 2^16
6239 //
6240 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
6241 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
6242
6243 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6244 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6245
6246 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6247 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6248
6249 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6250 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6251
6252 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6253 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6254
6255 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
6256 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6257
6258 // we store 4 integers: [0, 0, 0, 0]
6259 const int32x4_t m128_s_zero = vdupq_n_s32(0);
6260
6261 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6262 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6263
6264 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6265 {
6266 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6267
6268 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6269 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6270 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6271
6272 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6273 // factorTop = 128 - factorBottom
6274 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6275
6276 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6277
6278 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6279 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6280
6281 for (unsigned int x = 0; x < targetWidth; x += 8u)
6282 {
6283 if (x + 8u > targetWidth)
6284 {
6285 // the last iteration will not fit into the output frame,
6286 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6287
6288 ocean_assert(x >= 8u && targetWidth > 8u);
6289 const unsigned int newX = targetWidth - 8u;
6290
6291 ocean_assert(x > newX);
6292 targetPixelData -= x - newX;
6293
6294 x = newX;
6295
6296 // the for loop will stop after this iteration
6297 ocean_assert(!(x + 8u < targetWidth));
6298 }
6299
6300
6301 // we need four successive x coordinate floats:
6302 // [x + 3, x + 2, x + 1; x + 0]
6303 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6304 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6305
6306 // we calculate the four source locations for our four target locations
6307 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6308 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6309
6310 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6311 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6312
6313 // now we determine the pixel/integer accurate source locations
6314 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6315 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6316 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6317
6318 // we store the offsets we have calculated
6319 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6320 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6321
6322
6323
6324 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6325 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6326
6327 uint8x8x4_t topLeftPixels;
6328 uint8x8x4_t topRightPixels;
6329
6330 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6331 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6332
6333 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6334 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6335
6336 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6337 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6338
6339 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6340 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6341
6342 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6343 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6344
6345 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6346 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6347
6348 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6349 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6350
6351 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6352 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6353
6354
6355 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6356
6357 uint8x8x4_t bottomLeftPixels;
6358 uint8x8x4_t bottomRightPixels;
6359
6360 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6361 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6362
6363 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6364 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6365
6366 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6367 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6368
6369 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6370 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6371
6372 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6373 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6374
6375 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6376 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6377
6378 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6379 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6380
6381 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6382 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6383
6384
6385
6386 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6387 // we need an accuracy of 7 bits (values between 0 and 128):
6388 // 76 54 32 10
6389 // [F3 F2 F1 F0]
6390 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6391 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6392
6393 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6394 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6395 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6396 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6397
6398
6399
6400 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6401 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6402 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6403 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6404 uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6405
6406 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6407 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6408 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6409 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6410
6411 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6412 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6413 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6414 uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6415
6416
6417
6418 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6419 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6420 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6421 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6422 m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6423
6424 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6425 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6426 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6427 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6428
6429 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6430 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6431 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6432 uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6433
6434
6435
6436 // finnally we determine the interpolation result between top and bottom row
6437 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6438 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6439 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6440 m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6441
6442 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6443 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6444 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6445 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6446
6447
6448 // we narrow down the interpolation results and we store them
6449 uint8x8x4_t result;
6450 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6451 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6452 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6453 result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6454
6455 // we write back the results and interleave them automatically
6456 vst4_u8((uint8_t*)targetPixelData, result);
6457
6458 targetPixelData += 8;
6459 }
6460
6461 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6462 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6463
6464 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6465
6466 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6467 {
6468 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6469
6470 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6471 ocean_assert(lastSourcePixelLeft < sourceWidth);
6472 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6473
6474 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6475
6476 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6477 const unsigned int factorLeft = 128u - factorRight;
6478
6479 for (unsigned int c = 0u; c < 4u; ++c)
6480 {
6481 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6482 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6483 }
6484 }
6485 }
6486}
6487
6488/// \endcond
6489
6490#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6491
6492template <>
6493inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(const float* sourceRowTop, const float* sourceRowBottom, float* targetRow, const unsigned int elements, const float factorBottom)
6494{
6495 ocean_assert(sourceRowTop != nullptr);
6496 ocean_assert(sourceRowBottom != nullptr);
6497 ocean_assert(targetRow != nullptr);
6498 ocean_assert(elements >= 16u);
6499 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6500
6501 // [1.0f, 1.0f, 1.0f, 1.0f]
6502 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6503
6504 const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6505 const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4); // factorTop = 1 - factorBottom
6506
6507 for (unsigned int n = 0u; n < elements; n += 16u)
6508 {
6509 if (n + 16u > elements)
6510 {
6511 // the last iteration will not fit into the output frame,
6512 // so we simply shift x left by some elements (at most 15) and we will calculate some elements again
6513
6514 ocean_assert(n >= 16u && elements > 16u);
6515 const unsigned int offset = n - (elements - 16u);
6516 ocean_assert(offset < 16u);
6517
6518 sourceRowTop -= offset;
6519 sourceRowBottom -= offset;
6520 targetRow -= offset;
6521
6522 // the for loop will stop after this iteration
6523 ocean_assert(!(n + 16u < elements));
6524 }
6525
6526 // loading the next four 32 bit values from the top and bottom row
6527 const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6528 const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6529 const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6530 const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6531
6532 const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6533 const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6534 const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6535 const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6536
6537 // interpolatedRow_32x4 = top_32x4 * factorsTop + bottom_32x4 * factorsBottom
6538 float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6539 float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6540 float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6541 float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6542
6543 interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6544 interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6545 interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6546 interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6547
6548 // writing back the four interpolated 32 bit results
6549 vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6550 vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6551 vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6552 vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6553
6554 sourceRowTop += 16;
6555 sourceRowBottom += 16;
6556 targetRow += 16;
6557 }
6558}
6559
6560template <>
6561inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(const float* extendedSourceRow, float* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
6562{
6563 ocean_assert(extendedSourceRow != nullptr);
6564 ocean_assert(targetRow != nullptr);
6565 ocean_assert(targetWidth >= 8u);
6566 ocean_assert(interpolationLocations != nullptr);
6567 ocean_assert(interpolationFactorsRight != nullptr);
6568
6569 ocean_assert(channels == 1u);
6570
6571 // [1.0f, 1.0f, 1.0f, 1.0f]
6572 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6573
6574 for (unsigned int x = 0; x < targetWidth; x += 8u)
6575 {
6576 if (x + 8u > targetWidth)
6577 {
6578 // the last iteration will not fit into the output frame,
6579 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6580
6581 ocean_assert(x >= 8u && targetWidth > 8u);
6582 const unsigned int newX = targetWidth - 8u;
6583
6584 ocean_assert(x > newX);
6585 const unsigned int offset = x - newX;
6586
6587 targetRow -= offset;
6588 interpolationLocations -= offset;
6589 interpolationFactorsRight -= offset;
6590
6591 x = newX;
6592
6593 // the for loop will stop after this iteration
6594 ocean_assert(!(x + 8u < targetWidth));
6595 }
6596
6597 // we load the left and the right pixels (for four resulting target pixels)
6598
6599 const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6600 const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6601 const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6602
6603 const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6604 const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6605 const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6606
6607 const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6608 const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6609 const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6610
6611 const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6612 const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6613 const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6614
6615 const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6616 const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6617 const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6618
6619 const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6620 const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6621 const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6622
6623 const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6624 const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6625
6626 const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6627 const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6628
6629 const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6630 const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6631
6632 const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6633 const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6634
6635 const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6636 const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6637
6638 vst1q_f32(targetRow + 0, result_0123_f_32x4);
6639 vst1q_f32(targetRow + 4, result_4567_f_32x4);
6640
6641 targetRow += 8;
6642 interpolationLocations += 8;
6643 interpolationFactorsRight += 8;
6644 }
6645}
6646
6647template <>
6648inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(const float* source, float* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6649{
6650 ocean_assert(source != nullptr && target != nullptr);
6651 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6652 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6653 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6654
6655 ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6656
6657 const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6658 const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6659
6660 typedef void (*InterpolateRowVerticalFunction)(const float*, const float*, float*, const unsigned int, const float);
6661 typedef void (*InterpolateRowHorizontalFunction)(const float*, float*, const unsigned int, const unsigned int, const unsigned int*, const float*);
6662
6663 InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6664 InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6665
6666 if (sourceWidth * 1u >= 16u)
6667 {
6668 interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6669 }
6670
6671 if (targetWidth >= 8u)
6672 {
6673 interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6674 }
6675
6676 target += targetStrideElements * firstTargetRow;
6677
6678 const float sourceX_T_targetX = float(sourceX_s_targetX);
6679 const float sourceY_T_targetY = float(sourceY_s_targetY);
6680
6681 // See the generic template function for a detailed documentation regarding interpolation factors.
6682
6683 Memory memoryIntermediateExtendedRow;
6684 Memory memoryHorizontalInterpolationLocations;
6685 Memory memoryHorizontalInterpolationFactorsRight;
6686
6687 if (sourceWidth != targetWidth)
6688 {
6689 // in case we are scaling the width of the frame, we use an intermediate buffer and pre-calculated interpolation locations and factors
6690
6691 memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u); // one additional pixel
6692
6693 memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth); // one offset for each target pixel
6694
6695 memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth); // one factors (right) for each target pixel
6696 }
6697
6698 if (memoryHorizontalInterpolationLocations)
6699 {
6700 ocean_assert(memoryHorizontalInterpolationFactorsRight);
6701
6702 if (targetWidth >= 4u)
6703 {
6704 const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6705 const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6706
6707 // [0.0f, 0.0f, 0.0f, 0.0f]
6708 const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6709
6710 // [4.0f, 4.0f, 4.0f, 4.0f]
6711 const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6712
6713 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1]
6714 const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6715
6716 // [0.0f, 1.0f, 2.0f, 3.0f]
6717 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6718 float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6719
6720 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6721
6722 for (unsigned int x = 0u; x < targetWidth; x += 4u)
6723 {
6724 if (x + 4u > targetWidth)
6725 {
6726 // the last iteration will not fit into the output frame,
6727 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
6728
6729 ocean_assert(x >= 4u && targetWidth > 4u);
6730 const unsigned int newX = targetWidth - 4u;
6731
6732 ocean_assert(x > newX);
6733 const unsigned int offset = x - newX;
6734
6735 x = newX;
6736
6737 x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(float(offset)));
6738
6739 // the for loop will stop after this iteration
6740 ocean_assert(!(x + 4u < targetWidth));
6741 }
6742
6743 // we calculate the four source locations for our four target locations
6744 const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6745
6746 // now we determine the pixel/integer accurate source locations
6747 // left = min(floor(sourceX), sourceWidth - 1)
6748 uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4); // no rounding here
6749
6750 // we store the offsets we have calculated
6751 vst1q_u32(memoryHorizontalInterpolationLocations.data<unsigned int>() + x, left_0123_u_32x4);
6752
6753 // factorRight = sourcceX - float(left)
6754 const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6755
6756 vst1q_f32(memoryHorizontalInterpolationFactorsRight.data<float>() + x, factorsRight_f_32x4);
6757
6758 // [x + 0, x + 1, x + 2, x + 3] + [4, 4, 4, 4]
6759 x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6760 }
6761 }
6762 else
6763 {
6764 const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6765
6766 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6767
6768 for (unsigned int x = 0u; x < targetWidth; ++x)
6769 {
6770 const float sourceX = max(0.0f, targetOffsetX + float(x) * sourceX_T_targetX);
6771
6772 const unsigned int left = min((unsigned int)sourceX, sourceWidth - 1u); // no rounding here
6773
6774 memoryHorizontalInterpolationLocations.data<unsigned int>()[x] = left;
6775
6776 const float factorRight = sourceX - float(left);
6777 ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6778
6779 memoryHorizontalInterpolationFactorsRight.data<float>()[x] = factorRight;
6780 }
6781 }
6782 }
6783
6784 const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
6785
6786 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6787 {
6788 const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY * float(y), float(sourceHeight) - 1.0f);
6789
6790 const unsigned int sourceRowTop = (unsigned int)sourceY; // we must not round here
6791 const float factorBottom = sourceY - float(sourceRowTop);
6792 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6793
6794 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6795
6796 const float* const sourceTopRow = source + sourceStrideElements * sourceRowTop;
6797 const float* const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
6798
6799 float* targetRow = nullptr;
6800
6801 if (sourceHeight == targetHeight)
6802 {
6803 ocean_assert(sourceWidth != targetWidth);
6804 ocean_assert(memoryIntermediateExtendedRow);
6805
6806 // we do not need to interpolate two lines, thus we simply need to copy the row (as we need an additional pixel at the end)
6807 memcpy(memoryIntermediateExtendedRow.data<float>(), sourceTopRow, sourceWidth * sizeof(float));
6808 }
6809 else
6810 {
6811 // in case we do not scale the width of the frame, we can write the result to the target frame directly
6812 targetRow = memoryIntermediateExtendedRow.isNull() ? target : memoryIntermediateExtendedRow.data<float>();
6813
6814 ocean_assert(targetRow != nullptr);
6815 ocean_assert(interpolateRowVerticalFunction != nullptr);
6816 interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
6817 }
6818
6819 if (memoryIntermediateExtendedRow) // sourceWidth != targetWidth
6820 {
6821 // we use an extended row (with one additional pixel at the end - equal to the last pixel)
6822 // so we have to copy the last pixel
6823 memoryIntermediateExtendedRow.data<float>()[sourceWidth] = memoryIntermediateExtendedRow.data<float>()[sourceWidth - 1u];
6824
6825 interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.data<float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.data<unsigned int>(), memoryHorizontalInterpolationFactorsRight.data<float>());
6826 }
6827
6828 target += targetStrideElements;
6829 }
6830}
6831
6832#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
6833
6834template <typename T, typename TScale, unsigned int tChannels>
6835void FrameInterpolatorBilinear::scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6836{
6837 static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value), "Invalid TScale type");
6838
6839 ocean_assert(source != nullptr && target != nullptr);
6840 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
6841 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
6842 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6843
6844 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
6845 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
6846
6847 const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
6848 const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
6849
6850 /*
6851 * We determine the sub-pixel accurate source location for each target pixel as follows:
6852 *
6853 * Example with a downsampling by factor 4:
6854 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
6855 * targetRow with 3 pixels: | 0 1 2 |
6856 *
6857 * Thus, the source row can be separated into three blocks;
6858 * and we want to extract the color information from the center of the blocks:
6859 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
6860 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 4)
6861 *
6862 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
6863 * and subtract 0.5 again afterwards:
6864 * sourceX = (targetX + 0.5) * targetTSourceX - 0.5
6865 *
6866 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
6867 * (1 + 0.5) * 4 - 0.5 = 5.5
6868 *
6869 *
6870 * Example with a downsampling by factor 3:
6871 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
6872 * targetRow with 3 pixels: | 0 1 2 |
6873 *
6874 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
6875 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 3)
6876 *
6877 * e.g., (0 + 0.5) * 3 - 0.5 = 1
6878 * (1 + 0.5) * 3 - 0.5 = 4
6879 *
6880 *
6881 * Example with a downsampling by factor 2:
6882 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
6883 * targetRow with 3 pixels: | 0 1 2 |
6884 *
6885 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
6886 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 2)
6887 *
6888 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
6889 * (1 + 0.5) * 2 - 0.5 = 2.5
6890 *
6891 *
6892 * we can simplify the calculation (as we have a constant term):
6893 * sourceX = (targetX * targetTSourceX) + (0.5 * targetTSourceX - 0.5)
6894 */
6895
6896 const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
6897 const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
6898
6899 const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
6900 const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
6901
6902 target += targetStrideElements * firstTargetRow;
6903
6904 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6905 {
6906 const TScale sy = minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
6907 ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
6908
6909 const unsigned int sTop = (unsigned int)sy;
6910 ocean_assert(sy >= TScale(sTop));
6911
6912 const TScale factorBottom = sy - TScale(sTop);
6913 ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
6914
6915 const TScale factorTop = TScale(1) - factorBottom;
6916 ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
6917
6918 const T* const sourceTop = source + sTop * sourceStrideElements;
6919 const T* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
6920
6921 for (unsigned int x = 0; x < targetWidth; ++x)
6922 {
6923 const TScale sx = minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
6924 ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
6925
6926 const unsigned int sLeft = (unsigned int)sx;
6927 ocean_assert(sx >= TScale(sLeft));
6928
6929 const TScale factorRight = sx - TScale(sLeft);
6930 ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
6931
6932 const TScale factorLeft = TScale(1) - factorRight;
6933 ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
6934
6935 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
6936
6937 const T* const sourceTopLeft = sourceTop + sLeft * tChannels;
6938 const T* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
6939
6940 const TScale factorTopLeft = factorTop * factorLeft;
6941 const TScale factorTopRight = factorTop * factorRight;
6942 const TScale factorBottomLeft = factorBottom * factorLeft;
6943 const TScale factorBottomRight = factorBottom * factorRight;
6944
6945 for (unsigned int n = 0u; n < tChannels; ++n)
6946 {
6947 target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
6948 + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
6949 }
6950
6951 target += tChannels;
6952 }
6953
6954 target += targetPaddingElements;
6955 }
6956}
6957
6958template <unsigned int tChannels>
6959void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6960{
6961 static_assert(tChannels != 0u, "Invalid channel number!");
6962
6963 ocean_assert(firstTargetRow + numberTargetRows <= height);
6964
6965 typedef typename DataType<uint8_t, tChannels>::Type PixelType;
6966
6967 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
6968
6969 uint8_t zeroColor[tChannels] = {uint8_t(0)};
6970 const PixelType bColor = borderColor ? *(const PixelType*)borderColor : *(const PixelType*)zeroColor;
6971
6972 const SquareMatrix3 rotationMatrix3(Rotation(0, 0, 1, angle));
6973 const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
6974
6975 const Scalar width_1 = Scalar(width - 1u);
6976 const Scalar height_1 = Scalar(height - 1u);
6977 const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
6978
6979 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6980 {
6981 PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
6982
6983 const Scalar floatY = Scalar(y);
6984
6985 for (unsigned int x = 0; x < width; ++x)
6986 {
6987 const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (Vector2(Scalar(x), floatY) - anchorPosition));
6988
6989 if (sourceLocation.x() >= 0 && sourceLocation.y() >= 0 && sourceLocation.x() <= width_1 && sourceLocation.y() <= height_1)
6990 {
6991 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
6992 }
6993 else
6994 {
6995 *targetPixel = bColor;
6996 }
6997
6998 ++targetPixel;
6999 }
7000 }
7001}
7002
7003} // namespace CV
7004
7005} // namespace Ocean
7006
7007#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:130
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual bool isValid() const =0
Returns whether this camera is valid.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition FrameBlender.h:1160
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorBilinear.h:60
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:1524
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1437
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorBilinear.h:341
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition FrameInterpolatorBilinear.h:44
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:4288
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition FrameInterpolatorBilinear.h:1736
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition FrameInterpolatorBilinear.h:1896
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1802
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1836
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1963
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition FrameInterpolatorBilinear.h:2467
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 &center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1819
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4510
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1789
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:3580
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition FrameInterpolatorBilinear.h:2383
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1773
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition FrameInterpolatorBilinear.h:3960
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition FrameInterpolatorBilinear.h:4354
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4590
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition FrameInterpolatorBilinear.h:1660
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4636
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition FrameInterpolatorBilinear.h:3330
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition FrameInterpolatorBilinear.h:4995
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5410
LookupCorner2< Vector2 > LookupTable
Definition of a lookup table for 2D vectors.
Definition FrameInterpolatorBilinear.h:50
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition FrameInterpolatorBilinear.h:5375
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6959
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1883
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:3262
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1760
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition FrameInterpolatorBilinear.h:4684
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition FrameInterpolatorBilinear.h:1611
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition FrameInterpolatorBilinear.h:1624
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition FrameInterpolatorBilinear.h:4738
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5234
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:1945
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition FrameInterpolatorBilinear.h:5355
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1698
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition FrameInterpolatorBilinear.h:2142
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:5137
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition FrameInterpolatorBilinear.h:2231
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2652
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition FrameInterpolatorBilinear.h:5190
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4435
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6835
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2306
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:2056
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:65
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:470
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:458
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3770
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2876
Template class allowing to define an array of data types.
Definition DataType.h:27
This class implements Ocean's image class.
Definition Frame.h:1808
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4233
bool isValid() const
Returns whether this frame is valid.
Definition Frame.h:4528
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4228
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4218
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4223
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition Lookup2.h:959
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition Lookup2.h:953
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition Lookup2.h:1786
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition Lookup2.h:2128
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition Lookup2.h:1864
This class implements an object able to allocate memory.
Definition base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition Numeric.h:2026
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2087
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition Numeric.h:2237
unsigned int width() const
Returns the width of the camera image.
Definition PinholeCamera.h:1300
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition PinholeCamera.h:1263
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition PinholeCamera.h:1257
unsigned int height() const
Returns the height of the camera image.
Definition PinholeCamera.h:1306
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition PinholeCamera.h:1602
This class implements a 2x2 square matrix.
Definition SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1333
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1046
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition SquareMatrix3.h:1365
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
const T & z() const noexcept
Returns the z value.
Definition Vector3.h:836
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition base/Utilities.h:903
PixelCenter
Definition of individual centers of pixels.
Definition CV.h:117
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition CV.h:133
@ PC_CENTER
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition CV.h:150
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:42
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition Rotation.h:38
float Scalar
Definition of a scalar type.
Definition Math.h:129
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition DataType.h:373