Ocean
Loading...
Searching...
No Matches
FrameInterpolatorBilinear.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
9#define META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
10
11#include "ocean/cv/CV.h"
13#include "ocean/cv/NEON.h"
15#include "ocean/cv/SSE.h"
16
17#include "ocean/base/DataType.h"
18#include "ocean/base/Frame.h"
19#include "ocean/base/Memory.h"
20#include "ocean/base/Worker.h"
21
23
27#include "ocean/math/Lookup2.h"
32#include "ocean/math/Vector2.h"
33
34namespace Ocean
35{
36
37namespace CV
38{
39
40/**
41 * This class implements bilinear frame interpolator functions.
42 * @ingroup cv
43 */
44class OCEAN_CV_EXPORT FrameInterpolatorBilinear
45{
46 public:
47
48 /**
49 * Definition of a lookup table for 2D vectors.
50 */
52
53 public:
54
55 /**
56 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
57 * Best practice is to avoid using these functions if binary size matters,<br>
58 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
59 */
60 class OCEAN_CV_EXPORT Comfort
61 {
62 public:
63
64 /**
65 * Resizes/rescales a given frame by application of a bilinear interpolation.
66 * @param source The source frame to resize, must be valid
67 * @param target Resulting target frame with identical frame pixel format and pixel origin as the source frame, must be valid
68 * @param worker Optional worker object used for load distribution
69 * @return True, if the frame could be resized
70 */
71 static bool resize(const Frame& source, Frame& target, Worker* worker = nullptr);
72
73 /**
74 * Resizes/rescales a given frame by application of a bilinear interpolation.
75 * @param frame The frame to resize, must be valid
76 * @param width The width of the resized frame in pixel, with range [1, infinity)
77 * @param height The height of the resized frame in pixel, with range [1, infinity)
78 * @param worker Optional worker object used for load distribution
79 * @return True, if the frame could be resized
80 */
81 static inline bool resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker = nullptr);
82
83 /**
84 * Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
85 * The resulting zoomed image will have the same frame type (frame resolution, pixel format, pixel origin) as the input image.<br>
86 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
87 * @param source The source frame for which the zoomed image content will be created, must be valid
88 * @param target The resulting target frame which will receive the zoomed image, will be set to the same frame type as the source frame, can be invalid
89 * @param zoomFactor The zoom factor to be applied, a factor < 1 will zoom out, a factor > 1 will zoom in, with range (0, infinity)
90 * @param worker Optional worker object to distribute the computation to several CPU cores
91 * @return True, if succeeded
92 */
93 static bool zoom(const Frame& source, Frame& target, const Scalar zoomFactor, Worker* worker = nullptr);
94
95 /**
96 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
97 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
98 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
99 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).<br>
100 * Information: This function is the equivalent to OpenCV's cv::warpPerspective().
101 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
102 * @param input The input frame that will be transformed, must be valid
103 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must be valid
104 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
105 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels and the data type of the pixel elements, nullptr to assign 0 to each channel
106 * @param worker Optional worker object to distribute the computational load
107 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
108 * @return True, if succeeded
109 */
110 static bool homography(const Frame& input, Frame& output, const SquareMatrix3& input_H_output, const void* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
111
112 /**
113 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
114 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
115 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
116 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
117 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
118 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
119 * @param input The input frame that will be transformed
120 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
121 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
122 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
123 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
124 * @param worker Optional worker object to distribute the computational load
125 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
126 * @return True, if succeeded
127 */
128 static bool homographies(const Frame& input, Frame& output, const SquareMatrix3 homographies[4], const Vector2& outputQuadrantCenter, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
129
130 /**
131 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of a homography.
132 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
133 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
134 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
135 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
136 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
137 * @param input The input frame that will be transformed, must be valid
138 * @param output The Output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
139 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid and must have the same frame dimension as the output frame
140 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
141 * @param worker Optional worker object to distribute the computational load
142 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
143 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
144 * @return True, if succeeded
145 * @see coversHomographyInputFrame().
146 */
147 static bool homographyMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& input_H_output, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
148
149 /**
150 * Transforms a given input frame into an output frame (with arbitrary frame dimension) by application of four homographies.
151 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
152 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
153 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
154 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
155 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
156 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
157 * @param input The input frame that will be transformed, must be valid
158 * @param output The output frame resulting by application of the given homography, with same pixel format and pixel origin as the input frame, must have a valid dimension
159 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
160 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
161 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, output.width())x[0, output.height())
162 * @param worker Optional worker object to distribute the computational load
163 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
164 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
165 * @return True, if succeeded
166 * @see coversHomographyInputFrame().
167 */
168 static bool homographiesMask(const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3* homographies, const Vector2& outputQuadrantCenter, Worker* worker = nullptr, const uint8_t maskValue = 0xFF, const PixelPositionI& outputOrigin = PixelPositionI(0, 0));
169
170 /**
171 * Transforms a given input frame into an output frame by application of a homography.
172 * This function also uses a camera profile to improve the interpolation accuracy.<br>
173 * The given homography is transformed into a homography for normalized image coordinates.<br>
174 * Thus, also distortion parameters of the camera profile can be applied.<br>
175 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
176 * @param inputCamera The pinhole camera profile to be applied for the input frame
177 * @param outputCamera The pinhole camera profile to be applied for the output frame
178 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
179 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
180 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
181 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
182 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
183 * @param worker Optional worker object to distribute the computational load
184 * @return True, if succeeded
185 * @see homographyWithCameraMask(), homography().
186 */
187 static bool homographyWithCamera(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const Frame& input, Frame& output, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor = nullptr, Worker* worker = nullptr);
188
189 /**
190 * Transforms a given input frame into an output frame by application of a homography.
191 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
192 * This function also uses a camera profile to improve the interpolation accuracy.<br>
193 * The given homography is transformed into a homography for normalized image coordinates.<br>
194 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
195 * Thus, also distortion parameters of the camera profile can be applied.<br>
196 * @param inputCamera The pinhole camera profile to be applied for the input frame
197 * @param outputCamera The pinhole camera profile to be applied for the output frame
198 * @param input The input frame that will be transformed, the frame dimension must match the dimension of the left camera
199 * @param output The output frame resulting by application of the given homography, the frame dimension must match the dimension of the right camera
200 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
201 * @param homography The homography used to transform the given input frame, with includes both camera profiles: H = Ki * H' * Ko^-1
202 * @param worker Optional worker object to distribute the computational load
203 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
204 * @return True, if succeeded
205 * @see homographyWithCamera(), homography().
206 */
207 static bool homographyWithCameraMask(const AnyCamera& inputCamera, const AnyCamera& outputCamera, const Frame& input, Frame& output, Frame& outputMask, const SquareMatrix3& homography, Worker* worker = nullptr, const uint8_t maskValue = 0xFFu);
208
209 /**
210 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
211 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
212 * Information: This function is the equivalent to OpenCV's cv::remap().
213 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
214 * @param input The input frame that will be transformed
215 * @param output Resulting output frame, the dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
216 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
217 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
218 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
219 * @param worker Optional worker object to distribute the computation
220 * @return True, if succeeded
221 */
222 static bool lookup(const Frame& input, Frame& output, const LookupTable& input_LT_output, const bool offset, const void* borderColor, Worker* worker = nullptr);
223
224 /**
225 * Transforms a given input frame into an output frame by application of an interpolation lookup table and creates and additional mask as output.
226 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
227 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
228 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
229 * @param input The input frame which will be transformed
230 * @param output Resulting output frame, the frame dimension will be set to match the size of the lookup table, pixel format and pixel origin will be set to match the given input frame
231 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
232 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
233 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
234 * @param worker Optional worker object to distribute the computation
235 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
236 * @return True, if succeeded
237 */
238 static bool lookupMask(const Frame& input, Frame& output, Frame& outputMask, const LookupTable& input_LT_output, const bool offset, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
239
240 /**
241 * Applies an affine transformation to an image.
242 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.
243 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation.
244 * The multiplication of the affine transformation with pixel location in the target image yield their location in the source image, i.e., sourcePoint = source_A_target * targetPoint.
245 * The parameter 'targetOrigin' applies an additional translation to the provided affine transformation i.e., source_A_target * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
246 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
247 * <pre>
248 * a c e
249 * b d f
250 * 0 0 1
251 * </pre>
252 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
253 * Information: This function is the equivalent to OpenCV's cv::warpAffine().
254 * Note: For applications running on mobile devices, in order to keep the impact on binary size to a minimum please prefer a specialized transformation function (those that work on image pointers instead of Frame instances).
255 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
256 * @param source The source frame that will be transformed, must be valid
257 * @param target The resulting frame after applying the affine transformation to the source frame; pixel format and pixel origin must be identical to source frame; memory of target frame must be allocated by the caller
258 * @param source_A_target Affine transform used to transform the given source frame, transforming points defined in the target frame into points defined in the source frame
259 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
260 * @param worker Optional worker object to distribute the computational load
261 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
262 * @return True, if succeeded
263 */
264 static bool affine(const Frame& source, Frame& target, const SquareMatrix3& source_A_target, const uint8_t* borderColor = nullptr, Worker* worker = nullptr, const PixelPositionI& targetOrigin = PixelPositionI(0, 0));
265
266 /**
267 * Rotates a given frame by a bilinear interpolation.
268 * The frame will be rotated around a specified anchor position (inside or outside the frame).<br>
269 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).
270 * @param source The source frame to be rotated, must be valid
271 * @param target The target frame which will receive the rotated image, will be set to the same frame type as the source frame, can be invalid
272 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
273 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
274 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
275 * @param worker Optional worker object to distribute the computation to several CPU cores
276 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
277 * @return True, if succeeded
278 */
279 static bool rotate(const Frame& source, Frame& target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
280
281 /**
282 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
283 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
284 * @param sourceFrame The source image captured with the source camera profile, must be valid
285 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
286 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
287 * @param targetCamera The camera profile of the target frame, must be valid
288 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
289 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
290 * @param worker Optional worker object to distribute the computational load
291 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
292 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use ElementType(0) for each channel
293 * @return True, if succeeded
294 * @see resampleCameraImageImage8BitPerChannel().
295 */
296 static bool resampleCameraImage(const Frame& sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, Frame& targetFrame, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const void* borderColor = nullptr);
297
298 /**
299 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
300 * This function uses an integer interpolation with a precision of 1/128.
301 * @param frame The frame to determine the pixel values from, must be valid
302 * @param channels Number of channels of the given frame, with range [1, 8]
303 * @param width The width of the frame in pixel, with range [1, infinity)
304 * @param height The height of the frame in pixel, with range [1, infinity)
305 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
306 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
307 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
308 * @param result Resulting pixel values, must be valid, must be valid
309 * @return True, if succeeded
310 * @tparam TScalar The scalar data type of the sub-pixel position
311 */
312 template <typename TScalar = Scalar>
313 static bool interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result);
314
315 /**
316 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
317 * This function uses floating point precision during interpolation.
318 * @param frame The frame to determine the pixel values from, must be valid
319 * @param channels Number of channels of the given frame, with range [1, 8]
320 * @param width The width of the frame in pixel, with range [1, infinity)
321 * @param height The height of the frame in pixel, with range [1, infinity)
322 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
323 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
324 * @param position The position to determine the interpolated pixel values for, with range [0, width)x[0, height)
325 * @param result Resulting interpolated pixel value(s), must be valid
326 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
327 * @return True, if succeeded
328 * @tparam TSource The data type of the provided pixel values in the (source) frame
329 * @tparam TTarget The data type of the resulting interpolated value(s)
330 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
331 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
332 */
333 template <typename TSource, typename TTarget, typename TScalar = Scalar, typename TIntermediate = TScalar>
334 static bool interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
335 };
336
337 /**
338 * This class implements highly optimized interpolation functions with fixed properties.
339 * The functions can be significantly faster as these functions are tailored to the specific properties.
340 */
341 class OCEAN_CV_EXPORT SpecialCases
342 {
343 public:
344
345 /**
346 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 by using a bilinear interpolation.
347 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
348 * @param source The source frame buffer with resolution 400x400, must be valid
349 * @param target The target frame buffer receiving the resized image information, with resolution 224x224, must be valid
350 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
351 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
352 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
353 */
354 static void resize400x400To224x224_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
355
356 /**
357 * Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 by using a bilinear interpolation.
358 * This function exploits the fact that lookup locations and interpolation factors repeat after 25 pixels (16 pixels in the target resolution).
359 * @param source The source frame buffer with resolution 400x400, must be valid
360 * @param target The target frame buffer receiving the resized image information, with resolution 256x256, must be valid
361 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
362 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
363 * @see FrameInterpolatorBilinear::resize<T, tChannels>().
364 */
365 static void resize400x400To256x256_8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
366 };
367
368 /**
369 * Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinear interpolation.
370 * This function is actually a wrapper for scale().
371 * @param source The source frame buffer providing the image information to be resized, must be valid
372 * @param target The target frame buffer receiving the resized image information, must be valid
373 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
374 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
375 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
376 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
377 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
378 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
379 * @param worker Optional worker object to distribute the computation to several CPU cores
380 * @tparam T Data type of each pixel channel, e.g., float, double, int
381 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
382 * @see scale<T, tChannels>().
383 */
384 template <typename T, unsigned int tChannels>
385 static inline void resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
386
387 /**
388 * Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interpolation with user-defined scaling factors.
389 * Beware: This function is not optimized for performance but supports arbitrary data types.<br>
390 * Try to use scale8BitPerChannel() if possible.
391 * @param source The source frame buffer providing the image information to be resized, must be valid
392 * @param target The target frame buffer receiving the rescaled image information, must be valid
393 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
394 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
395 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
396 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
397 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
398 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
399 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
400 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
401 * @param worker Optional worker object to distribute the computation to several CPU cores
402 * @tparam T Data type of each pixel channel, e.g., float, double, int
403 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
404 * @see resize<T, tChannels>().
405 */
406 template <typename T, unsigned int tChannels>
407 static inline void scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
408
409 /**
410 * Rotates a given frame by a bilinear interpolation.
411 * The frame will be rotated around a specified anchor position (inside or outside the frame).
412 * @param source The source frame to be rotated, must be valid
413 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
414 * @param width The width of the source and target frame in pixel, with range [1, infinity)
415 * @param height The height of the source and target frame in pixel, with range [1, infinity)
416 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
417 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
418 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
419 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
420 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
421 * @param worker Optional worker object to distribute the computation to several CPU cores
422 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
423 * @tparam tChannels The number of channels both frames have, with range [1, infinity)
424 */
425 template <unsigned int tChannels>
426 static inline void rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr, const uint8_t* borderColor = nullptr);
427
428 /**
429 * Apply an affine transforms to a N-channel, 8-bit frame
430 * The target frame must have the same pixel format and pixel origin as the source frame, however the dimension (and position) of the target frame can be arbitrary.<br>
431 * This function allows the creation of an target frame fully covering the source frame (if the position and dimension of the target frame covers the transformation of the affine transformation).<br>
432 * The 'targetOrigin' parameter simply applies an additional translation onto the provided affine transformation i.e., affine * create_translation_matrix3x3(targetOrigin.x(), targetOrigin.y()).
433 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
434 * <pre>
435 * a c e
436 * b d f
437 * 0 0 1
438 * </pre>
439 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
440 * @param source Input frame that will be transformed, must be valid
441 * @param sourceWidth Width of both images in pixel, with range [1, infinity)
442 * @param sourceHeight Height of both images pixel, with range [1, infinity)
443 * @param source_A_target Affine transformation, such that: sourcePoint = source_A_target * targetPoint
444 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
445 * @param target The target frame using the given affine transform, must be valid
446 * @param targetOrigin The origin of the target frame defining the global position of the target frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
447 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
448 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
449 * @param sourcePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
450 * @param targetPaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
451 * @param worker Optional worker object to distribute the computational load
452 * @tparam tChannels Number of channels of the frame
453 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
454 */
455 template <unsigned int tChannels>
456 static inline void affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
457
458 /**
459 * Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of a homography.
460 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
461 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
462 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
463 * @param input The input frame that will be transformed, must be valid
464 * @param inputWidth Width of both images in pixel, with range [1, infinity)
465 * @param inputHeight Height of both images pixel, with range [1, infinity)
466 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
467 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
468 * @param output The output frame using the given homography, must be valid
469 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
470 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
471 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
472 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
473 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
474 * @param worker Optional worker object to distribute the computational load
475 * @tparam T Data type of each pixel channel, e.g., float, double, int
476 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
477 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
478 */
479 template <typename T, unsigned int tChannels>
480 static inline void homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
481
482 /**
483 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
484 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
485 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
486 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
487 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
488 * @param input The input frame that will be transformed
489 * @param inputWidth Width of both images in pixel, with range [1, infinity)
490 * @param inputHeight Height of both images pixel, with range [1, infinity)
491 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
492 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
493 * @param output The output frame using the given homography
494 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
495 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
496 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
497 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
498 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
499 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
500 * @param worker Optional worker object to distribute the computational load
501 * @tparam tChannels Number of channels of the frame
502 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel().
503 */
504 template <unsigned int tChannels>
505 static inline void homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
506
507 /**
508 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
509 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
510 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
511 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
512 * @param input The input frame that will be transformed, must be valid
513 * @param inputWidth Width of both images in pixel, with range [1, infinity)
514 * @param inputHeight Height of both images pixel, with range [1, infinity)
515 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
516 * @param output The output frame using the given homography, must be valid
517 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame, must be valid
518 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
519 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
520 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
521 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
522 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
523 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
524 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
525 * @param worker Optional worker object to distribute the computational load
526 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
527 * @see homography(), homographyWithCamera8BitPerChannel().
528 */
529 template <unsigned int tChannels>
530 static inline void homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue /* = 0xFF*/, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr);
531
532 /**
533 * Transforms a given 8 bit per channel input frame into an output frame by application of four homographies.
534 * For each quadrant of the output frame an individual homography is applied while the final result is interpolated between the four homographies.<br>
535 * The quadrant order of the homographies is as follows: top left, top right, bottom left, bottom right.<br>
536 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
537 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography.<br>
538 * @param input The input frame that will be transformed
539 * @param inputWidth Width of both images in pixel, with range [1, infinity)
540 * @param inputHeight Height of both images pixel, with range [1, infinity)
541 * @param homographies Four homographies used to transform the given input frame, transforming points defined in the output frame into points defined in the input frame
542 * @param output The output frame using the given homography
543 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
544 * @param outputQuadrantCenter The center position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)x[0, outputHeight)
545 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
546 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
547 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
548 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
549 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
550 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
551 * @param worker Optional worker object to distribute the computational load
552 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
553 * @tparam tChannels Number of channels of the frame
554 * @see homography(), homographyWithCamera8BitPerChannel().
555 */
556 template <unsigned int tChannels>
557 static inline void homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
558
559 /**
560 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
561 * This function also uses a camera profile to improve the interpolation accuracy.<br>
562 * The given homography is transformed into a homography for normalized image coordinates.<br>
563 * Thus, also distortion parameters of the camera profile can be applied.<br>
564 * @param inputCamera The pinhole camera profile to be applied for the input frame
565 * @param outputCamera The pinhole camera profile to be applied for the output frame
566 * @param input The input frame that will be transformed
567 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
568 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
569 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
570 * @param output The output frame using the given homography
571 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
572 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
573 * @param worker Optional worker object to distribute the computational load
574 * @tparam tChannels Number of channels of the frame
575 * @see homography().
576 */
577 template <unsigned int tChannels>
578 static inline void homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
579
580 /**
581 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
582 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame.<br>
583 * This function also uses a camera profile to improve the interpolation accuracy.<br>
584 * The given homography is transformed into a homography for normalized image coordinates.<br>
585 * Thus, also distortion parameters of the camera profile can be applied.
586 * @param inputCamera The pinhole camera profile to be applied for the input frame, must be valid
587 * @param outputCamera The pinhole camera profile to be applied for the output frame, must be valid
588 * @param input The input frame that will be transformed, must be valid
589 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
590 * @param homography The homography used to transform the given input frame by following equation: inputPoint = homography * outputPoint
591 * @param output The output frame using the given homography
592 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
593 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
594 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
595 * @param worker Optional worker object to distribute the computational load
596 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
597 * @tparam tChannels Number of channels of the frame
598 */
599 template <unsigned int tChannels>
600 static inline void homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
601
602 /**
603 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
604 * The frame must have a 1-plane pixel format.<br>
605 * The output frame must have the same pixel format and pixel origin as the input frame.
606 * @param input The input frame which will be transformed, must be valid
607 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
608 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
609 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
610 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
611 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
612 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
613 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
614 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
615 * @param worker Optional worker object to distribute the computation
616 * @tparam T Data type of each pixel channel, e.g., float, double, int
617 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
618 */
619 template <typename T, unsigned int tChannels>
620 static inline void lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
621
622 /**
623 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
624 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
625 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
626 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
627 * @param input The input frame which will be transformed
628 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
629 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
630 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
631 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
632 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
633 * @param outputMask Resulting mask frame with 8 bits per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
634 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
635 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
636 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
637 * @param worker Optional worker object to distribute the computation
638 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
639 * @tparam tChannels Number of channels of the frame
640 */
641 template <unsigned int tChannels>
642 static inline void lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker = nullptr, const uint8_t maskValue = 0xFF);
643
644 /**
645 * Re-samples a camera image which has been captured with a camera profile as if the image would have been captured with a second camera profile.
646 * The function can be used e.g., to rectify a fisheye camera image into a pinhole camera image.
647 * @param sourceFrame The source image captured with the source camera profile, must be valid
648 * @param sourceCamera The source camera profile which has been used to capture the source image, with resolution sourceFrame.width() x sourceFrame.height(), must be valid
649 * @param source_R_target The rotation transforming 3D points defined in the coordinate system of the target camera image to 3D points defined in the coordinate system of the source camera image, must be valid
650 * @param targetCamera The camera profile of the target frame, must be valid
651 * @param targetFrame The resulting target image, with resolution targetCamera.width() x targetCamera.height(), must be valid
652 * @param sourceFramePaddingElements The number of padding elements at the end of each source frame row, in elements, with range [0, infinity)
653 * @param targetFramePaddingElements The number of padding elements at the end of each target frame row, in elements, with range [0, infinity)
654 * @param source_OLT_target Optional resulting offset lookup table between target image points and source image points
655 * @param worker Optional worker object to distribute the computational load
656 * @param binSizeInPixel The size in pixel of the interpolation bins used for building the lookup table, with range [1, infinity)
657 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use T(0) for each channel
658 * @tparam T Data type of each pixel channel, e.g., uint8_t, int16_t, float, double
659 * @tparam tChannels The number of frame channels, with range [1, infinity)
660 * @see Comfort::resampleCameraImage().
661 */
662 template <typename T, unsigned int tChannels>
663 static void resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target = nullptr, Worker* worker = nullptr, const unsigned int binSizeInPixel = 8u, const T* borderColor = nullptr);
664
665 /**
666 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
667 * This function uses an integer interpolation with a precision of 1/128.
668 * @param frame The frame to determine the pixel values from, must be valid
669 * @param width The width of the frame in pixel, with range [1, infinity)
670 * @param height The height of the frame in pixel, with range [1, infinity)
671 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
672 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
673 * @param result Resulting pixel values, must be valid, must be valid
674 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
675 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
676 * @tparam TScalar The scalar data type of the sub-pixel position
677 * @see interpolatePixel().
678 */
679 template <unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar>
680 static inline void interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result);
681
682 /**
683 * Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data type.
684 * This function uses floating point precision during interpolation.
685 * @param frame The frame to determine the pixel values from, must be valid
686 * @param width The width of the frame in pixel, with range [1, infinity)
687 * @param height The height of the frame in pixel, with range [1, infinity)
688 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
689 * @param position The position for which the interpolated pixel will be determined, with ranges [0, width - 1]x[0, height - 1] for PC_TOP_LEFT, [0, width]x[0, height] for PC_CENTER
690 * @param result Resulting interpolated pixel value(s), must be valid
691 * @param resultBias Optional bias value which will be added to the interpolation result e.g. to handle rounding, with range (-infinity, infinity), default is zero
692 * @tparam TSource The data type of the provided pixel values in the (source) frame
693 * @tparam TTarget The data type of the resulting interpolated value(s)
694 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
695 * @tparam tPixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
696 * @tparam TScalar The data type of each coordinate of the provided interpolation location, should be either Scalar, float, or double
697 * @tparam TIntermediate The data type of the intermediate interpolation result before assigning the result
698 * @see interpolatePixel8BitPerChannel().
699 */
700 template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter = PC_TOP_LEFT, typename TScalar = Scalar, typename TIntermediate = TScalar>
701 static inline void interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias = TIntermediate(0));
702
703 /**
704 * Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame with alpha channel.
705 * The center of each pixel is located with an offset of (0.5 x 0.5) in relation to the real pixel position.<br>
706 * The given frame is virtually extended by a fully transparent border so that this functions supports arbitrary interpolation positions.<br>
707 * If the given position lies inside the frame area of (-0.5, -0.5) -> (width + 0.5, height + 0.5) the resulting interpolation result will contain color information of the frame, otherwise a fully transparent interpolation result is provided.<br>
708 * @param frame The frame to determine the pixel values from, must be valid
709 * @param width The width of the frame in pixel, with range [1, infinity)
710 * @param height The height of the frame in pixel, with range [1, infinity)
711 * @param position The position to determine the interpolated pixel values for, with range (-infinity, infinity)x(-infinity, infinity)
712 * @param result Resulting pixel values, must be valid
713 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
714 * @tparam tChannels Number of channels of the given frame, with range [1, infinity)
715 * @tparam tAlphaAtFront True, if the alpha channel is in the front of the data channels
716 * @tparam tTransparentIs0xFF True, if 0xFF is interpreted as fully transparent
717 */
718 template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
719 static inline void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements);
720
721 /**
722 * Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as lined integral frame.
723 * @param linedIntegralFrame The lined integral image created from the actual gray-scale image for which the patch intensity sum will be determined, must be valid
724 * @param frameWidth Width of the original frame in pixel (not the width of the lined-integral frame), with range [1, infinity)
725 * @param frameHeight Height of the original frame in pixel (not the height of the lined-integral frame), with range [1, infinity)
726 * @param lineIntegralFramePaddingElements The number of padding elements at the end of each integral image row, in elements, with range [0, infinity)
727 * @param center 2D coordinates of the center point of the patch, with range [patchWidth/2, frameWidth - patchWidth/2)x[patchHeight/2, frameHeight - patchHeight/2) for PC_CENTER
728 * @param pixelCenter The pixel center to be used during interpolation, either 'PC_TOP_LEFT' or 'PC_CENTER'
729 * @param patchWidth Width of the calculated patch in pixel with range [1, frameWidth - 1]
730 * @param patchHeight Height of the calculated patch in pixel with range [1, frameHeight - 1]
731 * @return The resulting sum of the pixel intensities
732 */
733 static Scalar patchIntensitySum1Channel(const uint32_t* linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2& center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight);
734
735 /**
736 * Checks whether the application of a given homography for a specified input frame and output frame covers the entire image information (which is necessary for the bilinear interpolation) or whether the homography relies on missing image information.
737 * @param inputWidth The width of the input frame in pixel, with range [1, infinity)
738 * @param inputHeight The height of the input frame in pixel, with range [1, infinity)
739 * @param outputWidth The width of the output frame in pixel, with range [1, infinity)
740 * @param outputHeight The height of the output frame in pixel, with range [1, infinity)
741 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
742 * @param outputOriginX The horizontal origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
743 * @param outputOriginY The vertical origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)
744 * @return True, if the homography covers the entire input image information (if all output pixels will receive valid data from the input frame); False, otherwise
745 */
746 static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3& input_H_output, const int outputOriginX = 0, const int outputOriginY = 0);
747
748 private:
749
750 /**
751 * Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
752 * The output frame must have the same pixel format and pixel origin as the input frame, however the dimension (and position) of the output frame can be arbitrary.<br>
753 * This function allows the creation of an output frame fully covering the input frame (if the position and dimension of the output frame covers the transformation of the homography).<br>
754 * The 'outputOrigin' parameter simply applies an additional translation onto the provided homography i.e., homography * create_translation_matrix3x3(outputOrigin.x(), outputOrigin.y()).
755 * @param input The input frame that will be transformed, must be valid
756 * @param inputWidth Width of both images in pixel, with range [1, infinity)
757 * @param inputHeight Height of both images pixel, with range [1, infinity)
758 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
759 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
760 * @param output The output frame using the given homography, must be valid
761 * @param outputOrigin The origin of the output frame defining the global position of the output frame's pixel coordinate (0, 0), with range (-infinity, infinity)x(-infinity, infinity)
762 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
763 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
764 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
765 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
766 * @param worker Optional worker object to distribute the computational load
767 * @tparam tChannels Number of channels of the frame
768 * @see homographyMask8BitPerChannel(), homographyWithCamera8BitPerChannel(), homography().
769 */
770 template <unsigned int tChannels>
771 static inline void homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker = nullptr);
772
773 /**
774 * Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-defined scaling factors.
775 * The frame must have a 1-plane pixel format with DT_UNSIGNED_INTEGER_8 as data type (e.g., FORMAT_Y8, FORMAT_RGB24, FORMAT_RGBA32, ...).<br>
776 * Information: This function is the equivalent to OpenCV's cv::resize().
777 * @param source The source frame buffer providing the image information to be resized, must be valid
778 * @param target The target frame buffer receiving the rescaled image information, must be valid
779 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
780 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
781 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
782 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
783 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
784 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
785 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
786 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
787 * @param worker Optional worker object to distribute the computation to several CPU cores
788 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
789 */
790 template <unsigned int tChannels>
791 static inline void scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
792
793 /**
794 * Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
795 * @param source The image data of the source frame to be resized, must be valid
796 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
797 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
798 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
799 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
800 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
801 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
802 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
803 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
804 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
805 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
806 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
807 * @tparam tChannels Number of frame channels, with range [0, infinity)
808 */
809 template <unsigned int tChannels>
810 static void scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
811
812 /**
813 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
814 * This function uses interpolation factors with 7 bit precision and does not apply any SIMD instructions.
815 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
816 * @param targetRow The target row receiving the interpolation result, must be valid
817 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
818 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
819 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
820 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
821 * @see interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON<tChannels>().
822 */
823 static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
824
825 /**
826 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
827 * This function does not apply any SIMD instructions.<br>
828 * The length of both source rows is identical with the length of the target row.
829 * @param sourceRowTop The top source row to be used for interpolation, must be valid
830 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
831 * @param targetRow The target row receiving the interpolation result, must be valid
832 * @param elements The number of elements in the row to (width * channels), with range [1, infinity)
833 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
834 * @tparam T The data type of each element, should be 'float'
835 */
836 template <typename T>
837 static void interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
838
839 /**
840 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
841 * This function does not apply any SIMD instructions.
842 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
843 * @param targetRow The target row receiving the interpolation result, must be valid
844 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
845 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
846 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
847 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
848 * @tparam T The data type of each element, should be 'float'
849 * @tparam tChannels The number of frame channels this function can handle, should be 1
850 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
851 */
852 template <typename T, unsigned int tChannels>
853 static void interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
854
855#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
856
857 /**
858 * Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
859 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.<br>
860 * The length of both source rows is identical with the length of the target row.
861 * @param sourceRowTop The top source row to be used for interpolation, must be valid
862 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
863 * @param targetRow The target row receiving the interpolation result, must be valid
864 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
865 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 128 - factorBottom, with range [0, 128]
866 */
867 static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t* sourceRowTop, const uint8_t* sourceRowBottom, uint8_t* targetRow, const unsigned int elements, const unsigned int factorBottom);
868
869 /**
870 * Applies a (vertical) linear interpolation between two rows with arbitrary data types.
871 * This function applies NEON instructions.<br>
872 * The length of both source rows is identical with the length of the target row.
873 * @param sourceRowTop The top source row to be used for interpolation, must be valid
874 * @param sourceRowBottom The bottom source row to be used for interpolation, must be valid
875 * @param targetRow The target row receiving the interpolation result, must be valid
876 * @param elements The number of elements in the row to (width * channels), with range [16, infinity)
877 * @param factorBottom The interpolation factor for all elements of the bottom row, with factorTop = 1 - factorBottom, with range [0, 1]
878 * @tparam T The data type of each element, should be 'float'
879 */
880 template <typename T>
881 static void interpolateRowVerticalNEON(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom);
882
883 /**
884 * Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
885 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
886 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
887 * @param targetRow The target row receiving the interpolation result, must be valid
888 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
889 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
890 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
891 * @param interpolationFactors The two successive (left !and! right) interpolation factors for each left and right source pixel, with range [0, 128]
892 * @tparam tChannels The number of frame channels this function can handle, possible values are 1, 4
893 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
894 */
895 template <unsigned int tChannels>
896 static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t* extendedSourceRow, uint8_t* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const uint8_t* interpolationFactors);
897
898 /**
899 * Applies a (horizontal) linear interpolation for one row with arbitrary data type.
900 * This function applies NEON instructions.
901 * @param extendedSourceRow The source row extended with a copy of the last pixel for which the interpolation will be applied, must be valid
902 * @param targetRow The target row receiving the interpolation result, must be valid
903 * @param targetWidth The with of the target row in pixel, with range [8, infinity)
904 * @param channels The number of frame channels, must be identical with 'tChannels', possible values are 1, 4
905 * @param interpolationLocations The successive locations within the source row defining the location of the left pixels to be interpolated (specified in elements !not! in pixels - e.g., interpolationLocations[0] = firstInterpolationPixel * channels), one for each target pixel, with range [0, (targetWidth - 1) * channels]
906 * @param interpolationFactorsRight The right interpolation factors for each right source pixel, with range [0, 1]
907 * @tparam T The data type of each element, should be 'float'
908 * @tparam tChannels The number of frame channels this function can handle, should be 1
909 * @see interpolateRowHorizontal8BitPerChannel7BitPrecision().
910 */
911 template <typename T, unsigned int tChannels>
912 static void interpolateRowHorizontalNEON(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight);
913
914 /**
915 * Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
916 * This function applies NEON instructions and uses interpolation factors with 7 bit precision.
917 * @param source The image data of the source frame to be resized, must be valid
918 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
919 * @param sourceWidth Width of the source frame in pixel, with range [2, 65.535]
920 * @param sourceHeight Height of the source frame in pixel, with range [1, 65.535]
921 * @param targetWidth Width of the target frame in pixel, with range [tMinimalTargetWidth, 65.535]
922 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
923 * @param channels The number of channels both frames have, with range [1, infinity)
924 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
925 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
926 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
927 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
928 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
929 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
930 * @see interpolateRowVertical8BitPerChannel7BitPrecisionNEON(), interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON().
931 */
932 static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
933
934#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
935
936 /**
937 * Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
938 * @param source The image data of the source frame to be resized, must be valid
939 * @param target The target frame buffer receiving the interpolated (resized) source frame, must be valid
940 * @param sourceWidth Width of the source frame in pixel, with range [1, infinity)
941 * @param sourceHeight Height of the source frame in pixel, with range [1, infinity)
942 * @param targetWidth Width of the target frame in pixel, with range [1, infinity)
943 * @param targetHeight Height of the target frame in pixel, with range [1, infinity)
944 * @param sourceX_s_targetX The horizontal scale factor converting a location in the target frame to a location in the source frame (xSource = sourceX_s_targetX * xTarget), with range (0, sourceWidth/targetWidth]
945 * @param sourceY_s_targetY The vertical scale factor converting a location in the target frame to a location in the source frame (ySource = sourceY_s_targetY * yTarget), with range (0, sourceHeight/targetHeight]
946 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
947 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
948 * @param firstTargetRow The first target row to be handled, with range [0, targetHeight)
949 * @param numberTargetRows The number of target row to be handled, with range [1, targetHeight - firstTargetRow]
950 * @tparam T The data type of each pixel channel, e.g., float, double, int, short, ...
951 * @tparam TScale The data type of the internal scaling factors to be used, should be 'float' or 'double'
952 * @tparam tChannels Number of frame channels, with range [0, infinity)
953 */
954 template <typename T, typename TScale, unsigned int tChannels>
955 static void scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
956
957 /**
958 * Rotates a subset of a given frame by a bilinear interpolation.
959 * @param source The source frame to be rotated, must be valid
960 * @param target The target frame which will receive the rotated image, with same frame type as the source frame, must be valid
961 * @param width The width of the source and target frame in pixel, with range [1, infinity)
962 * @param height The height of the source and target frame in pixel, with range [1, infinity)
963 * @param horizontalAnchorPosition Position of the rotation anchor in the horizontal direction, with range (-infinity, infinity)
964 * @param verticalAnchorPosition Position of the rotation anchor in the vertical direction, with range (-infinity, infinity)
965 * @param angle The counter clockwise rotation angle in radian, with range [0, 2PI)
966 * @param borderColor The color of border pixels for which now visual content exists, provide one value for each channel, nullptr to use 0x00 for each channel
967 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
968 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
969 * @param firstTargetRow The first row of the target frame to be handled, with range [0, height)
970 * @param numberTargetRows The number of rows in the target frame to be handled, with range [1, height - firstTargetRow]
971 * @tparam tChannels Number of frame channels, with range [1, infinity)
972 */
973 template <unsigned int tChannels>
974 static void rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows);
975
976 /**
977 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
978 * The affine transform must be provided in the following form: `sourcePoint = source_A_target * targetPoint`
979 * This function does not apply SIMD instructions and can be used for any frame dimensions.
980 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
981 * <pre>
982 * a c e
983 * b d f
984 * 0 0 1
985 * </pre>
986 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
987 * @param source Input frame that will be transformed
988 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
989 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
990 * @param source_A_target Affine transformation which is applied to the source frame.
991 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
992 * @param target Output frame using the given affine transform
993 * @param targetWidth The width of the target image in pixel, with range [1, infinity)
994 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
995 * @param firstTargetRow The first target row to be handled
996 * @param numberTargetRows Number of target rows to be handled
997 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
998 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
999 * @tparam tChannels Number of frame channels, with range [1, infinity)
1000 * @see affine8BitPerChannelSSESubset(), affine8BitPerChannelNEONSubset()
1001 */
1002 template <unsigned int tChannels>
1003 static inline void affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1004
1005 /**
1006 * Transforms an 8 bit per channel frame using the given homography.
1007 * The homography must provide the following transformation: inputPoint = homography * outputPoint
1008 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1009 * @param input The input frame that will be transformed
1010 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1011 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1012 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1013 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1014 * @param output The output frame using the given homography
1015 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1016 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1017 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1018 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1019 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1020 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1021 * @tparam tChannels Number of frame channels, with range [1, infinity)
1022 * @see homography8BitPerChannelSSESubset(), homography8BitPerChannelNEONSubset()
1023 */
1024 template <unsigned int tChannels>
1025 static inline void homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1026
1027 /**
1028 * Transforms a frame with (almost) arbitrary pixel format using the given homography.
1029 * This function does not apply SIMD instructions and can be used for any frame dimensions.
1030 * @param input The input frame that will be transformed
1031 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1032 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1033 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1034 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1035 * @param output The output frame using the given homography
1036 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1037 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1038 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1039 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1040 * @param firstOutputRow The first output row to be handled, with range [0, height - 1]
1041 * @param numberOutputRows Number of output rows to be handled, with range [1, height - firstOutputRow]
1042 * @tparam T Data type of each pixel channel, e.g., float, double, int
1043 * @tparam tChannels Number of frame channels, with range [1, infinity)
1044 * @see homography8BitPerChannelSSESubset().
1045 */
1046 template <typename T, unsigned int tChannels>
1047 static inline void homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1048
1049#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1050
1051 /**
1052 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
1053 * This function applies SSE instructions.<br>
1054 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1055 * This function has the property: sourcePoint = source_A_target * targetPoint
1056 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1057 * <pre>
1058 * a c e
1059 * b d f
1060 * 0 0 1
1061 * </pre>
1062 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1063 * @param source Input frame that will be transformed
1064 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1065 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1066 * @param source_A_target Affine transformation which is applied to source frame.
1067 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1068 * @param target The target frame where the result of the transformation will be stored
1069 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1070 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1071 * @param firstTargetRow The first target row to be handled
1072 * @param numberTargetRows Number of target rows to be handled
1073 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1074 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1075 * @tparam tChannels Number of frame channels
1076 * @see affine8BitPerChannelSubset(), affine8BitPerChannelSubsetNEON().
1077 */
1078 template <unsigned int tChannels>
1079 static inline void affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1080
1081 /**
1082 * Transforms an 8 bit per channel frame using the given homography.
1083 * This function applies SSE instructions.<br>
1084 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames
1085 * @param input The input frame that will be transformed, must be valid
1086 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1087 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1088 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1089 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1090 * @param output The output frame using the given homography, must be valid
1091 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1092 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1093 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1094 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1095 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1096 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1097 * @tparam tChannels Number of frame channels, with range [1, infinity)
1098 * @see homography8BitPerChannelSubset().
1099 */
1100 template <unsigned int tChannels>
1101 static inline void homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1102
1103 /**
1104 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1105 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1106 * @param source The source image in which the four independent pixels are located, must be valid
1107 * @param offsetsTopLeft The four offsets within the source image for the four top-left pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1108 * @param offsetsTopRight The four offsets within the source image for the four top-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1109 * @param offsetsBottomLeft The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1110 * @param offsetsBottomRight The four offsets within the source image for the four bottom-right pixels used for the interpolation, with range [0, (width * tChannels + sourcePaddingElements) * (height - 1) + width * tChannels), or the corresponding validPixel information is 0x00000000
1111 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1112 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1113 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1114 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1115 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1116 * @tparam tChannels The number of frame channels, with range [1, infinity)
1117 */
1118 template <unsigned int tChannels>
1119 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1120
1121 /**
1122 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1123 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1124 * @param m128_sourcesTopLeft The pixel values of the four top left pixels, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1125 * @param m128_sourcesTopRight The pixel values of the four top right pixels, starting at the first byte may contain unused bytes at the end
1126 * @param m128_sourcesBottomLeft The pixel values of the four bottom left pixels, starting at the first byte may contain unused bytes at the end
1127 * @param m128_sourcesBottomRight The pixel values of the four bottom right pixels, starting at the first byte may contain unused bytes at the end
1128 * @param m128_factorsTopLeft The four interpolation factors of the four top left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1129 * @param m128_factorsTopRight The four interpolation factors of the four top right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1130 * @param m128_factorsBottomLeft The four interpolation factors of the four bottom left pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1131 * @param m128_factorsBottomRight The four interpolation factors of the four bottom right pixels, with ranges [0, 128 * 128], so that (m128_factorsTopLeft + m128_factorsTopRight + m128_factorsBottomLeft + m128_factorsBottomRight) == (128 * 128)
1132 * @return The resulting interpolated pixel values, starting at the first byte may contain unused bytes at the end, e.g., RGBARGBARGBARGBA or YUVYUVYUVYUV----
1133 * @tparam tChannels The number of frame channels, with range [3, 4]
1134 */
1135 template <unsigned int tChannels>
1136 static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i& m128_sourcesTopLeft, const __m128i& m128_sourcesTopRight, const __m128i& m128_sourcesBottomLeft, const __m128i& m128_sourcesBottomRight, const __m128i& m128_factorsTopLeft, const __m128i& m128_factorsTopRight, const __m128i& m128_factorsBottomLeft, const __m128i& m128_factorsBottomRight);
1137
1138#endif // OCEAN_HARDWARE_SSE_VERSION
1139
1140#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1141
1142 /**
1143 * Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
1144 * This function applies NEON instructions.<br>
1145 * This one has the property: sourcePoint = source_A_target * targetPoint
1146 * Beware: The target width 'targetWidth' must be >= 4, use affine8BitPerChannelSubset for small target frames
1147 * Please note that here the affine transformation is specified as a 3-by-3 matrix (in contrast to the more commonly used 2-by-3 matrix) and should take of the form:
1148 * <pre>
1149 * a c e
1150 * b d f
1151 * 0 0 1
1152 * </pre>
1153 * However, this function disregards the last row completely and only uses the top two rows, i.e., the elements a through f.
1154 * @param source The source frame that will be transformed
1155 * @param sourceWidth Width of both source images in pixel, with range [1, infinity)
1156 * @param sourceHeight Height of both source images pixel, with range [1, infinity)
1157 * @param source_A_target Affine transform used to transform the given source frame.
1158 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1159 * @param target The target frame using the given affine transform
1160 * @param targetWidth The width of the target image in pixel, with range [4, infinity)
1161 * @param targetHeight The height of the target image in pixel, with range [1, infinity)
1162 * @param firstTargetRow The first target row to be handled
1163 * @param numberTargetRows Number of target rows to be handled
1164 * @param sourcePaddingElements The number of padding elements at the end of each source frame, in elements, with range [0, infinity)
1165 * @param targetPaddingElements The number of padding elements at the end of each target frame, in elements, with range [0, infinity)
1166 * @tparam tChannels Number of frame channels, with range [1, infinity)
1167 * @see homography8BitPerChannelSubset().
1168 */
1169 template <unsigned int tChannels>
1170 static inline void affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements);
1171
1172 /**
1173 * Transforms an 8 bit per channel frame using the given homography.
1174 * This function applies NEON instructions.<br>
1175 * Beware: The output width 'outputWidth' must be >= 4, use homography8BitPerChannelSubset for small output frames.
1176 * @param input The input frame that will be transformed
1177 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1178 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1179 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1180 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1181 * @param output The output frame using the given homography
1182 * @param outputWidth The width of the output image in pixel, with range [4, infinity)
1183 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1184 * @param inputPaddingElements The number of padding elements at the end of each input frame row, in elements, with range [0, infinity)
1185 * @param outputPaddingElements The number of padding elements at the end of each output frame row, in elements, with range [0, infinity)
1186 * @param firstOutputRow The first output row to be handled, with range [0, outputHeight - 1]
1187 * @param numberOutputRows Number of output rows to be handled, with range [1, outputHeight - firstOutputRow]
1188 * @tparam tChannels Number of frame channels, with range [1, infinity)
1189 * @see homography8BitPerChannelSubset().
1190 */
1191 template <unsigned int tChannels>
1192 static inline void homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1193
1194 /**
1195 * Interpolates 4 independent pixels concurrently based on already known locations (top-left, top-right, bottom-left, and bottom-right) and interpolation factors for the source pixels.
1196 * This function also supports to interpolate the pixel values for a subset of the four pixels only, valid pixels will be interpolated, invalid pixels receive a defined border color.
1197 * @param source The source image in which the four independent pixels are located, must be valid
1198 * @param offsetsTopLeftElements The four offsets within the source image for the four top-left pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1199 * @param offsetsTopRightElements The four offsets within the source image for the four top-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1200 * @param offsetsBottomLeftElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1201 * @param offsetsBottomRightElements The four offsets within the source image for the four bottom-right pixels used for the interpolation, in elements, with ranges [0, strideElements * height), or the corresponding validPixel information is 0x00000000
1202 * @param validPixels For boolean states specifying whether which of the given four pixels will be interpolated and which one will receive the defined border color, 0x00000000 for invalid pixels everything else for valid pixels (e.g., 0xFFFFFFFF)
1203 * @param borderColor The border color that will be assigned to each resulting pixel that is invalid (for which the corresponding validPixel information is 0x00000000
1204 * @param m128_factorsRight The horizontal interpolation factors for right pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1205 * @param m128_factorsBottom The vertical interpolation factors for bottom pixels, with range [0, 128], 128 to use the color information of the bottom pixels only, 0 to use the color information of the top pixels only
1206 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1207 * @tparam tChannels The number of frame channels, with range [1, infinity)
1208 */
1209 template <unsigned int tChannels>
1210 static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels);
1211
1212 /**
1213 * Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must be known already (top-left, top-right, bottom-left, and bottom-right), further the interpolation factors must be known already.
1214 * @param topLeft_u_8x8 The 8 top left pixel values to be used for interpolation
1215 * @param topRight_u_8x8 The 8 top right pixel values to be used for interpolation
1216 * @param bottomLeft_u_8x8 The 8 bottom left pixel values to be used for interpolation
1217 * @param bottomRight_u_8x8 The 8 bottom right pixel values to be used for interpolation
1218 * @param factorsRight_factorsBottom_128_u_8x16 The eight horizontal interpolation factors for right pixels, and the eight vertical interpolation factors for the bottom pixels, with range [0, 128], 128 to use the color information of the right pixels only, 0 to use the color information of the left pixels only
1219 * @param targetPositionPixels The buffer that will receive the interpolated color values, must be valid
1220 */
1221 static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels);
1222
1223#endif // OCEAN_HARDWARE_SSE_VERSION
1224
1225 /**
1226 * Transforms an 8 bit per channel frame using the given homographies.
1227 * @param input The input frame that will be transformed
1228 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1229 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1230 * @param homographies Homographies used to transform the given input frame
1231 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1232 * @param output The output frame using the given homography
1233 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1234 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1235 * @param outputOriginX The horizontal coordinate of the output frame's origin
1236 * @param outputOriginY The vertical coordinate of the output frame's origin
1237 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1238 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1239 * @param inputPaddingElements The number of padding elements at the end of each input frame, in elements, with range [0, infinity)
1240 * @param outputPaddingElements The number of padding elements at the end of each output frame, in elements, with range [0, infinity)
1241 * @param firstOutputRow The first output row to be handled
1242 * @param numberOutputRows Number of output rows to be handled
1243 * @tparam tChannels Number of frame channels
1244 */
1245 template <unsigned int tChannels>
1246 static inline void homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1247
1248 /**
1249 * Transforms an 8 bit per channel frame using the given homography.
1250 * @param input The input frame that will be transformed, must be valid
1251 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1252 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1253 * @param input_H_output Homography used to transform the given input frame by following equation: inputPoint = input_H_output * outputPoint, must be valid
1254 * @param output The output frame resulting by application of the given homography, must be valid
1255 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1256 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1257 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1258 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1259 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1260 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1261 * @param outputMaskPaddingElements The number of padding elements at the end of output mask input row, in elements, with range [0, infinity)
1262 * @param firstOutputRow The first output row to be handled
1263 * @param numberOutputRows Number of output rows to be handled
1264 * @tparam tChannels Number of frame channels, with range [1, infinity)
1265 */
1266 template <unsigned int tChannels>
1267 static inline void homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1268
1269 /**
1270 * Transforms an 8 bit per channel frame using the given homography.
1271 * @param input The input frame that will be transformed
1272 * @param inputWidth Width of both input images in pixel, with range [1, infinity)
1273 * @param inputHeight Height of both input images pixel, with range [1, infinity)
1274 * @param homographies Homographies used to transform the given input frame
1275 * @param output The output frame resulting by application of the given homography
1276 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1277 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1278 * @param outputQuadrantCenterX The horizontal position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputWidth)
1279 * @param outputQuadrantCenterY The vertical position of the four quadrants in the output frame (the local center not respecting the optional outputOrigin parameter), with range [0, outputHeight)
1280 * @param outputOriginX The horizontal coordinate of the output frame's origin
1281 * @param outputOriginY The vertical coordinate of the output frame's origin
1282 * @param outputWidth The width of the output image in pixel, with range [1, infinity)
1283 * @param outputHeight The height of the output image in pixel, with range [1, infinity)
1284 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1285 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1286 * @param outputMaskPaddingElements The number of padding elements at the end of each row of the output mask, in elements, with range [0, infinity)
1287 * @param firstOutputRow The first output row to be handled
1288 * @param numberOutputRows Number of output rows to be handled
1289 * @tparam tChannels Number of frame channels
1290 */
1291 template <unsigned int tChannels>
1292 static inline void homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows);
1293
1294 /**
1295 * Transforms an 8 bit per channel frame using the given homography.
1296 * @param inputCamera The pinhole camera profile to be applied for the input frame
1297 * @param outputCamera The pinhole camera profile to be applied for the output frame
1298 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1299 * @param input The input frame that will be transformed
1300 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1301 * @param useDistortionParameters True, to apply the distortion parameters of the camera profile
1302 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1303 * @param output The output frame resulting by application of the given homography
1304 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1305 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1306 * @param firstRow The first row to be handled
1307 * @param numberRows Number of rows to be handled
1308 * @tparam tChannels Number of frame channels
1309 */
1310 template <unsigned int tChannels>
1311 static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1312
1313 /**
1314 * Transforms an 8 bit per channel frame using the given homography.
1315 * @param inputCamera The pinhole camera profile to be applied for the input frame
1316 * @param outputCamera The pinhole camera profile to be applied for the output frame
1317 * @param outputCameraDistortionLookup The distortion lookup table of the of the output camera
1318 * @param input The input frame that will be transformed, must be valid
1319 * @param inputPaddingElements The number of padding elements at the end of each input row, in elements, with range [0, infinity)
1320 * @param normalizedHomography The homography used to transform the given input frame specified in normalized camera coordinates
1321 * @param output The output frame resulting by application of the given homography
1322 * @param outputMask Mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1323 * @param outputPaddingElements The number of padding elements at the end of each output row, in elements, with range [0, infinity)
1324 * @param outputMaskPaddingElements The number of padding elements at the end of each output mask row, in elements, with range [0, infinity)
1325 * @param maskValue 8 bit mask values for reference pixels lying inside the given camera frame, reference pixels lying outside the camera frame will be assigned with (0xFF - maskValue)
1326 * @param firstRow The first row to be handled
1327 * @param numberRows Number of rows to be handled
1328 * @tparam tChannels Number of frame channels
1329 */
1330 template <unsigned int tChannels>
1331 static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows);
1332
1333 /**
1334 * Transforms a subset of a given input frame with uint8_t as element type into an output frame by application of an interpolation lookup table.
1335 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1336 * @param input The input frame which will be transformed, must be valid
1337 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1338 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1339 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1340 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1341 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1342 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1343 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1344 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1345 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1346 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1347 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1348 */
1349 template <unsigned int tChannels>
1350 static void lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1351
1352 /**
1353 * Transforms a subset of a given input frame with arbitrary element type into an output frame by application of an interpolation lookup table.
1354 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1355 * @param input The input frame which will be transformed, must be valid
1356 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1357 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1358 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1359 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1360 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign T(0) to each channel
1361 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table, must be valid
1362 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1363 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1364 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1365 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1366 * @tparam T Data type of each pixel channel, must not be 'uint8_t'
1367 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1368 */
1369 template <typename T, unsigned int tChannels>
1370 static void lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1371
1372#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1373
1374 /**
1375 * Transforms a subset of a given input frame into an output frame by application of an interpolation lookup table and uses NEON instructions.
1376 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1377 * @param input The input frame which will be transformed, must be valid
1378 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1379 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1380 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), with table width >= 4, must be valid
1381 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1382 * @param borderColor Color of undefined pixel positions, the size of the buffer must match to the number of channels, nullptr to assign 0x00 to each channel
1383 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1384 * @param inputPaddingElements Number of padding elements at the end of each input row, in elements, with range [0, infinity)
1385 * @param outputPaddingElements Number of padding elements at the end of each output row, in elements, with range [0, infinity)
1386 * @param firstRow First row to be handled, with range [0, input_LT_output->sizeY())
1387 * @param numberRows Number of rows to be handled, with range [1, input_LT_output->sizeY() - firstRow]
1388 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
1389 */
1390 template <unsigned int tChannels>
1391 static void lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1392
1393#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1394
1395 /**
1396 * Transforms a given input frame into an output frame by application of an interpolation lookup table.
1397 * The output frame must have the same pixel format and pixel origin as the input frame.<br>
1398 * Input frame pixels lying outside the frame will be masked in the resulting output mask frame, further theses pixels are untouched in the output frame.<br>
1399 * @param input The input frame which will be transformed
1400 * @param inputWidth The width of the given input frame in pixel, with range [1, infinity)
1401 * @param inputHeight The height of the given input frame in pixel, with range [1, infinity)
1402 * @param input_LT_output The lookup table which defines the transformation from locations defined in the output frame to locations defined in the input frame (the lookup table stores the corresponding locations in the input frame), must be valid
1403 * @param offset True, if the lookup table store local offsets; False, if the lookup table stores absolute positions
1404 * @param output Resulting output frame with frame dimension equal to the size of the given lookup table
1405 * @param outputMask Resulting mask frame with 8 bit per pixel defining whether an output frame pixel has a valid corresponding pixel in the input frame
1406 * @param maskValue 8 bit mask values for pixels lying inside the input frame, pixels lying outside the input frame will be assigned with (0xFF - maskValue)
1407 * @param inputPaddingElements The number of padding elements at the end of each row of `input`, in elements, with range [0, infinity)
1408 * @param outputPaddingElements The number of padding elements at the end of each row of `output`, in elements, with range [0, infinity)
1409 * @param outputMaskPaddingElements The number of padding elements at the end of each row of `outputMask`, in elements, with range [0, infinity)
1410 * @param firstRow First row to be handled
1411 * @param numberRows Number of rows to be handled
1412 * @tparam tChannels Number of channels of the frame
1413 */
1414 template <unsigned int tChannels>
1415 static void lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1416};
1417
1418inline bool FrameInterpolatorBilinear::Comfort::resize(Frame& frame, const unsigned int width, const unsigned int height, Worker* worker)
1419{
1420 ocean_assert(frame.isValid());
1421 ocean_assert(width >= 1u && height >= 1u);
1422
1423 Frame target(FrameType(frame, width, height));
1424
1425 if (!resize(frame, target, worker))
1426 {
1427 return false;
1428 }
1429
1430 target.setTimestamp(frame.timestamp());
1432
1433 frame = std::move(target);
1434 return true;
1435}
1436
1437template <typename TScalar>
1438bool FrameInterpolatorBilinear::Comfort::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, uint8_t* result)
1439{
1440 ocean_assert(frame != nullptr);
1441 ocean_assert(channels >= 1u && channels <= 8u);
1442
1443 if (pixelCenter == PC_TOP_LEFT)
1444 {
1445 switch (channels)
1446 {
1447 case 1u:
1448 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1449 return true;
1450
1451 case 2u:
1452 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1453 return true;
1454
1455 case 3u:
1456 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1457 return true;
1458
1459 case 4u:
1460 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1461 return true;
1462
1463 case 5u:
1464 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1465 return true;
1466
1467 case 6u:
1468 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1469 return true;
1470
1471 case 7u:
1472 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1473 return true;
1474
1475 case 8u:
1476 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_TOP_LEFT, TScalar>(frame, width, height, framePaddingElements, position, result);
1477 return true;
1478
1479 default:
1480 break;
1481 }
1482 }
1483 else
1484 {
1485 ocean_assert(pixelCenter == PC_CENTER);
1486
1487 switch (channels)
1488 {
1489 case 1u:
1490 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<1u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1491 return true;
1492
1493 case 2u:
1494 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<2u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1495 return true;
1496
1497 case 3u:
1498 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<3u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1499 return true;
1500
1501 case 4u:
1502 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<4u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1503 return true;
1504
1505 case 5u:
1506 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<5u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1507 return true;
1508
1509 case 6u:
1510 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<6u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1511 return true;
1512
1513 case 7u:
1514 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<7u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1515 return true;
1516
1517 case 8u:
1518 FrameInterpolatorBilinear::interpolatePixel8BitPerChannel<8u, PC_CENTER, TScalar>(frame, width, height, framePaddingElements, position, result);
1519 return true;
1520
1521 default:
1522 break;
1523 }
1524 }
1525
1526 ocean_assert(false && "Invalid channel number");
1527 return false;
1528}
1529
1530template <typename TSource, typename TTarget, typename TScalar, typename TIntermediate>
1531bool FrameInterpolatorBilinear::Comfort::interpolatePixel(const TSource* frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
1532{
1533 ocean_assert(frame != nullptr);
1534 ocean_assert(channels >= 1u && channels <= 8u);
1535
1536 if (pixelCenter == PC_TOP_LEFT)
1537 {
1538 switch (channels)
1539 {
1540 case 1u:
1541 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1542 return true;
1543
1544 case 2u:
1545 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1546 return true;
1547
1548 case 3u:
1549 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1550 return true;
1551
1552 case 4u:
1553 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1554 return true;
1555
1556 case 5u:
1557 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1558 return true;
1559
1560 case 6u:
1561 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1562 return true;
1563
1564 case 7u:
1565 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1566 return true;
1567
1568 case 8u:
1569 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_TOP_LEFT, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1570 return true;
1571
1572 default:
1573 break;
1574 }
1575 }
1576 else
1577 {
1578 ocean_assert(pixelCenter == PC_CENTER);
1579
1580 switch (channels)
1581 {
1582 case 1u:
1583 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 1u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1584 return true;
1585
1586 case 2u:
1587 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 2u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1588 return true;
1589
1590 case 3u:
1591 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 3u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1592 return true;
1593
1594 case 4u:
1595 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 4u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1596 return true;
1597
1598 case 5u:
1599 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 5u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1600 return true;
1601
1602 case 6u:
1603 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 6u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1604 return true;
1605
1606 case 7u:
1607 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 7u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1608 return true;
1609
1610 case 8u:
1611 FrameInterpolatorBilinear::interpolatePixel<TSource, TTarget, 8u, PC_CENTER, TScalar, TIntermediate>(frame, width, height, framePaddingElements, position, result, resultBias);
1612 return true;
1613
1614 default:
1615 break;
1616 }
1617 }
1618
1619 ocean_assert(false && "Invalid channel number");
1620 return false;
1621}
1622
1623template <typename T, unsigned int tChannels>
1624inline void FrameInterpolatorBilinear::resize(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1625{
1626 ocean_assert(source != nullptr && target != nullptr);
1627 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1628 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1629
1630 const double sourceX_s_targetX = double(sourceWidth) / double(targetWidth);
1631 const double sourceY_s_targetY = double(sourceHeight) / double(targetHeight);
1632
1633 scale<T, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1634}
1635
1636template <typename T, unsigned int tChannels>
1637inline void FrameInterpolatorBilinear::scale(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1638{
1639 ocean_assert(source != nullptr && target != nullptr);
1640 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
1641 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
1642 ocean_assert(sourceX_s_targetX > 0.0);
1643 ocean_assert(sourceY_s_targetY > 0.0);
1644
1645 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
1646 {
1647 FrameConverter::subFrame<T>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
1648 return;
1649 }
1650
1651 if (std::is_same<T, uint8_t>::value)
1652 {
1653 // we have a SIMD-based optimized version for 'uint8_t' data types
1654
1655 scale8BitPerChannel<tChannels>((const uint8_t*)source, (uint8_t*)target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, worker);
1656 }
1657 else
1658 {
1659 using TScale = typename FloatTyper<T>::Type;
1660
1661 if (worker)
1662 {
1663 worker->executeFunction(Worker::Function::createStatic(&scaleSubset<T, TScale, tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
1664 }
1665 else
1666 {
1667 scaleSubset<T, TScale, tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
1668 }
1669 }
1670}
1671
1672template <unsigned int tChannels>
1673inline void FrameInterpolatorBilinear::affine8BitPerChannel(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3& source_A_target, const uint8_t* borderColor, uint8_t* target, const CV::PixelPositionI& targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
1674{
1675 // If applicable, apply an additional translation to the affine transformation.
1676 const SquareMatrix3 adjustedAffineTransform = source_A_target * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(targetOrigin.x()), Scalar(targetOrigin.y()), 1));
1677
1678 if (worker)
1679 {
1680 if (targetWidth >= 4u)
1681 {
1682#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1683 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSSESubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1684 return;
1685#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1686 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1687 return;
1688#endif
1689 }
1690
1691 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::affine8BitPerChannelSubset<tChannels>, source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, 0u, sourcePaddingElements, targetPaddingElements), 0, targetHeight, 8u, 9u, 20u);
1692 }
1693 else
1694 {
1695 if (targetWidth >= 4u)
1696 {
1697#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1698 affine8BitPerChannelSSESubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1699 return;
1700#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1701 affine8BitPerChannelNEONSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1702 return;
1703#endif
1704 }
1705
1706 affine8BitPerChannelSubset<tChannels>(source, sourceWidth, sourceHeight, &adjustedAffineTransform, borderColor, target, targetWidth, targetHeight, 0u, targetHeight, sourcePaddingElements, targetPaddingElements);
1707 }
1708}
1709
1710template <unsigned int tChannels>
1711inline void FrameInterpolatorBilinear::homography8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const uint8_t* borderColor, uint8_t* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1712{
1713 // we adjust the homography to address 'outputOrigin'
1714 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1715
1716 if (worker)
1717 {
1718 if (outputWidth >= 4u)
1719 {
1720#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1721 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSSESubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1722 return;
1723#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1724 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1725 return;
1726#endif
1727 }
1728
1729 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homography8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1730 }
1731 else
1732 {
1733 if (outputWidth >= 4u)
1734 {
1735#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1736 homography8BitPerChannelSSESubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1737 return;
1738#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1739 homography8BitPerChannelNEONSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1740 return;
1741#endif
1742 }
1743
1744 homography8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1745 }
1746}
1747
1748template <typename T, unsigned int tChannels>
1749inline void FrameInterpolatorBilinear::homography(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, const T* borderColor, T* output, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1750{
1751 if (std::is_same<T, uint8_t>::value)
1752 {
1753 homography8BitPerChannel<tChannels>((const uint8_t*)input, inputWidth, inputHeight, input_H_output, (const uint8_t*)borderColor, (uint8_t*)output, outputOrigin, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, worker);
1754 return;
1755 }
1756 else
1757 {
1758 // we adjust the homography to address 'outputOrigin'
1759 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1760
1761 if (worker)
1762 {
1763 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographySubset<T, tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 10u, 11u, 20u);
1764 }
1765 else
1766 {
1767 homographySubset<T, tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, borderColor, output, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1768 }
1769 }
1770}
1771
1772template <unsigned int tChannels>
1773inline void FrameInterpolatorBilinear::homographies8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t* borderColor, uint8_t* output, const Vector2& outputQuadrantCenter, const PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1774{
1775 if (worker)
1776 {
1777 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographies8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputHeight, 14u, 15u, 20u);
1778 }
1779 else
1780 {
1781 homographies8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, borderColor, output, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, 0u, outputHeight);
1782 }
1783}
1784
1785template <unsigned int tChannels>
1786inline void FrameInterpolatorBilinear::homographyMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3& input_H_output, uint8_t* output, uint8_t* outputMask, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker)
1787{
1788 // we adjust the homography to address 'outputOrigin'
1789 const SquareMatrix3 input_H_shiftedOutput = input_H_output * SquareMatrix3(Vector3(1, 0, 0), Vector3(0, 1, 0), Vector3(Scalar(outputOrigin.x()), Scalar(outputOrigin.y()), 1));
1790
1791 if (worker)
1792 {
1793 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight, 12u, 13u, 20u);
1794 }
1795 else
1796 {
1797 homographyMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_H_shiftedOutput, output, outputMask, maskValue, outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1798 }
1799}
1800
1801template <unsigned int tChannels>
1802inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t* output, uint8_t* outputMask, const Vector2& outputQuadrantCenter, const CV::PixelPositionI& outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1803{
1804 if (worker)
1805 {
1806 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0, outputHeight);
1807 }
1808 else
1809 {
1810 homographiesMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, homographies, output, outputMask, maskValue, outputQuadrantCenter.x(), outputQuadrantCenter.y(), outputOrigin.x(), outputOrigin.y(), outputWidth, outputHeight, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, outputHeight);
1811 }
1812}
1813
1814template <unsigned int tChannels>
1815inline void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const SquareMatrix3& homography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1816{
1817 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1818
1819 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1820
1821 if (worker)
1822 {
1823 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0, outputCamera.height());
1824 }
1825 else
1826 {
1827 homographyWithCamera8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, &normalizedHomography, useDistortionParameters, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, outputCamera.height());
1828 }
1829}
1830
1831template <unsigned int tChannels>
1832inline void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannel(const PinholeCamera& inputCamera, const PinholeCamera& outputCamera, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3& homography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1833{
1834 const SquareMatrix3 normalizedHomography(inputCamera.invertedIntrinsic() * homography * outputCamera.intrinsic());
1835
1836 const PinholeCamera::DistortionLookup outputCameraDistortionLookup(outputCamera, 10u);
1837
1838 if (worker)
1839 {
1840 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset<tChannels>, &inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, 0u), 0, outputCamera.height(), 11u, 12u, 10u);
1841 }
1842 else
1843 {
1844 homographyWithCameraMask8BitPerChannelSubset<tChannels>(&inputCamera, &outputCamera, &outputCameraDistortionLookup, input, inputPaddingElements, &normalizedHomography, output, outputMask, outputPaddingElements, outputMaskPaddingElements, maskValue, 0u, outputCamera.height());
1845 }
1846}
1847
1848template <typename T, unsigned int tChannels>
1849inline void FrameInterpolatorBilinear::lookup(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker* worker)
1850{
1851 if constexpr (std::is_same<T, uint8_t>::value)
1852 {
1853#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1854 if ((tChannels >= 1u && input_LT_output.sizeX() >= 8) || (tChannels >= 2u && input_LT_output.sizeX() >= 4))
1855 {
1856 // NEON implementation for 1 channel: min width 8; for 2+ channels: min width 4
1857
1858 if (worker)
1859 {
1860 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1861 }
1862 else
1863 {
1864 lookup8BitPerChannelSubsetNEON<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1865 }
1866
1867 return;
1868 }
1869#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1870
1871 if (worker)
1872 {
1873 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookup8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)input_LT_output.sizeY(), 9u, 10u, 20u);
1874 }
1875 else
1876 {
1877 lookup8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1878 }
1879 }
1880 else
1881 {
1882 ocean_assert((!std::is_same<T, uint8_t>::value));
1883
1884 if (worker)
1885 {
1886 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupSubset<T, tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 9u, 10u, 20u);
1887 }
1888 else
1889 {
1890 lookupSubset<T, tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, borderColor, output, inputPaddingElements, outputPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1891 }
1892 }
1893}
1894
1895template <unsigned int tChannels>
1896inline void FrameInterpolatorBilinear::lookupMask8BitPerChannel(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable& input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker* worker, const uint8_t maskValue)
1897{
1898 if (worker)
1899 {
1900 worker->executeFunction(Worker::Function::createStatic(&FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset<tChannels>, input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, 0u), 0u, (unsigned int)(input_LT_output.sizeY()), 11u, 12u, 20u);
1901 }
1902 else
1903 {
1904 lookupMask8BitPerChannelSubset<tChannels>(input, inputWidth, inputHeight, &input_LT_output, offset, output, outputMask, maskValue, inputPaddingElements, outputPaddingElements, outputMaskPaddingElements, 0u, (unsigned int)(input_LT_output.sizeY()));
1905 }
1906}
1907
1908template <typename T, unsigned int tChannels>
1909void FrameInterpolatorBilinear::resampleCameraImage(const T* sourceFrame, const AnyCamera& sourceCamera, const SquareMatrix3& source_R_target, const AnyCamera& targetCamera, T* targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2<Vector2>* source_OLT_target, Worker* worker, const unsigned int binSizeInPixel, const T* borderColor)
1910{
1911 static_assert(tChannels >= 1u, "Invalid channel number!");
1912
1913 ocean_assert(sourceFrame != nullptr);
1914 ocean_assert(sourceCamera.isValid());
1915 ocean_assert(source_R_target.isOrthonormal());
1916 ocean_assert(targetCamera.isValid());
1917 ocean_assert(targetFrame != nullptr);
1918 ocean_assert(binSizeInPixel >= 1u);
1919
1920 const size_t binsX = std::max(1u, targetCamera.width() / binSizeInPixel);
1921 const size_t binsY = std::max(1u, targetCamera.height() / binSizeInPixel);
1922 CV::FrameInterpolatorBilinear::LookupTable lookupTable(targetCamera.width(), targetCamera.height(), binsX, binsY);
1923
1924 for (size_t yBin = 0; yBin <= lookupTable.binsY(); ++yBin)
1925 {
1926 for (size_t xBin = 0; xBin <= lookupTable.binsX(); ++xBin)
1927 {
1928 const Vector2 cornerPosition = lookupTable.binTopLeftCornerPosition(xBin, yBin);
1929
1930 constexpr bool makeUnitVector = false; // we don't need a unit/normalized vector as we project the vector into the camera again
1931
1932 const Vector3 rayI = source_R_target * targetCamera.vector(cornerPosition, makeUnitVector);
1933 const Vector3 rayIF = Vector3(rayI.x(), -rayI.y(), -rayI.z());
1934
1935 if (rayIF.z() > Numeric::eps())
1936 {
1937 const Vector2 projectedPoint = sourceCamera.projectToImageIF(rayIF);
1938
1939 lookupTable.setBinTopLeftCornerValue(xBin, yBin, projectedPoint - cornerPosition);
1940 }
1941 else
1942 {
1943 // simply a coordinate far outside the input
1944 lookupTable.setBinTopLeftCornerValue(xBin, yBin, Vector2(Scalar(sourceCamera.width() * 10u), Scalar(sourceCamera.height() * 10u)));
1945 }
1946 }
1947 }
1948
1949 lookup<T, tChannels>(sourceFrame, sourceCamera.width(), sourceCamera.height(), lookupTable, true /*offset*/, borderColor, targetFrame, sourceFramePaddingElements, targetFramePaddingElements, worker);
1950
1951 if (source_OLT_target)
1952 {
1953 *source_OLT_target = std::move(lookupTable);
1954 }
1955}
1956
1957template <unsigned int tChannels>
1958void FrameInterpolatorBilinear::rotate8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker, const uint8_t* borderColor)
1959{
1960 static_assert(tChannels != 0u, "Invalid channel number!");
1961
1962 ocean_assert(source != nullptr && target != nullptr);
1963 ocean_assert(width >= 1u && height >= 1u);
1964
1965 if (worker)
1966 {
1967 worker->executeFunction(Worker::Function::createStatic(&rotate8BitPerChannelSubset<tChannels>, source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height);
1968 }
1969 else
1970 {
1971 rotate8BitPerChannelSubset<tChannels>(source, target, width, height, horizontalAnchorPosition, verticalAnchorPosition, angle, borderColor, sourcePaddingElements, targetPaddingElements, 0u, height);
1972 }
1973}
1974
1975template <unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar>
1976inline void FrameInterpolatorBilinear::interpolatePixel8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, uint8_t* result)
1977{
1978 static_assert(tChannels != 0u, "Invalid channel number!");
1979 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
1980
1981 ocean_assert(frame != nullptr && result != nullptr);
1982 ocean_assert(width != 0u && height != 0u);
1983
1984 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
1985
1986 ocean_assert(position.x() >= TScalar(0));
1987 ocean_assert(position.y() >= TScalar(0));
1988
1989 if constexpr (tPixelCenter == PC_TOP_LEFT)
1990 {
1991 ocean_assert(position.x() <= TScalar(width - 1u));
1992 ocean_assert(position.y() <= TScalar(height - 1u));
1993
1994 const unsigned int left = (unsigned int)(position.x());
1995 const unsigned int top = (unsigned int)(position.y());
1996 ocean_assert(left < width && top < height);
1997
1998 const TScalar tx = position.x() - TScalar(left);
1999 ocean_assert(tx >= 0 && tx <= 1);
2000 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2001 const unsigned int txi_ = 128u - txi;
2002
2003 const TScalar ty = position.y() - TScalar(top);
2004 ocean_assert(ty >= 0 && ty <= 1);
2005 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2006 const unsigned int tyi_ = 128u - tyi;
2007
2008 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2009 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2010
2011 const uint8_t* const topLeft = frame + top * frameStrideElements + tChannels * left;
2012
2013 const unsigned int txty = txi * tyi;
2014 const unsigned int txty_ = txi * tyi_;
2015 const unsigned int tx_ty = txi_ * tyi;
2016 const unsigned int tx_ty_ = txi_ * tyi_;
2017
2018 for (unsigned int n = 0u; n < tChannels; ++n)
2019 {
2020 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2021 }
2022 }
2023 else
2024 {
2025 ocean_assert(tPixelCenter == PC_CENTER);
2026
2027 ocean_assert(position.x() <= TScalar(width));
2028 ocean_assert(position.y() <= TScalar(height));
2029
2030 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2031 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2032
2033 const unsigned int left = (unsigned int)(xShifted);
2034 const unsigned int top = (unsigned int)(yShifted);
2035
2036 ocean_assert(left < width);
2037 ocean_assert(top < height);
2038
2039 const TScalar tx = xShifted - TScalar(left);
2040 const TScalar ty = yShifted - TScalar(top);
2041
2042 ocean_assert(tx >= 0 && tx <= 1);
2043 ocean_assert(ty >= 0 && ty <= 1);
2044
2045 const unsigned int txi = (unsigned int)(tx * TScalar(128) + TScalar(0.5));
2046 const unsigned int txi_ = 128u - txi;
2047
2048 const unsigned int tyi = (unsigned int)(ty * TScalar(128) + TScalar(0.5));
2049 const unsigned int tyi_ = 128u - tyi;
2050
2051 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2052 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2053
2054 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2055
2056 const unsigned int txty = txi * tyi;
2057 const unsigned int txty_ = txi * tyi_;
2058 const unsigned int tx_ty = txi_ * tyi;
2059 const unsigned int tx_ty_ = txi_ * tyi_;
2060
2061 for (unsigned int n = 0u; n < tChannels; ++n)
2062 {
2063 result[n] = uint8_t((topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_ + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u);
2064 }
2065 }
2066}
2067
2068template <typename TSource, typename TTarget, unsigned int tChannels, PixelCenter tPixelCenter, typename TScalar, typename TIntermediate>
2069inline void FrameInterpolatorBilinear::interpolatePixel(const TSource* frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2<TScalar>& position, TTarget* result, const TIntermediate& resultBias)
2070{
2071 static_assert(tChannels != 0u, "Invalid channel number!");
2072 static_assert(tPixelCenter == PC_TOP_LEFT || tPixelCenter == PC_CENTER, "Invalid pixel center!");
2073
2074 ocean_assert(frame != nullptr && result != nullptr);
2075 ocean_assert(width != 0u && height != 0u);
2076
2077 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2078
2079 ocean_assert(position.x() >= TScalar(0));
2080 ocean_assert(position.y() >= TScalar(0));
2081
2082 if constexpr (tPixelCenter == PC_TOP_LEFT)
2083 {
2084 ocean_assert(position.x() <= TScalar(width - 1u));
2085 ocean_assert(position.y() <= TScalar(height - 1u));
2086
2087 const unsigned int left = (unsigned int)(position.x());
2088 const unsigned int top = (unsigned int)(position.y());
2089
2090 const TScalar tx = position.x() - TScalar(left);
2091 ocean_assert(tx >= 0 && tx <= 1);
2092
2093 const TScalar ty = position.y() - TScalar(top);
2094 ocean_assert(ty >= 0 && ty <= 1);
2095
2096 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2097 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2098
2099 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2100
2101 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2102 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2103 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2104 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2105
2106 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2107
2108 for (unsigned int n = 0u; n < tChannels; ++n)
2109 {
2110 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2111 }
2112 }
2113 else
2114 {
2115 ocean_assert(tPixelCenter == PC_CENTER);
2116
2117 ocean_assert(position.x() <= TScalar(width));
2118 ocean_assert(position.y() <= TScalar(height));
2119
2120 const TScalar xShifted = std::max(TScalar(0.0), position.x() - TScalar(0.5));
2121 const TScalar yShifted = std::max(TScalar(0.0), position.y() - TScalar(0.5));
2122
2123 const unsigned int left = (unsigned int)(xShifted);
2124 const unsigned int top = (unsigned int)(yShifted);
2125
2126 ocean_assert(left < width);
2127 ocean_assert(top < height);
2128
2129 const TScalar tx = xShifted - TScalar(left);
2130 const TScalar ty = yShifted - TScalar(top);
2131
2132 ocean_assert(tx >= 0 && tx <= 1);
2133 ocean_assert(ty >= 0 && ty <= 1);
2134
2135 const unsigned int rightOffset = left + 1u < width ? tChannels : 0u;
2136 const unsigned int bottomOffset = top + 1u < height ? frameStrideElements : 0u;
2137
2138 const TSource* const topLeft = frame + top * frameStrideElements + tChannels * left;
2139
2140 const TIntermediate txty = TIntermediate(tx) * TIntermediate(ty);
2141 const TIntermediate txty_ = TIntermediate(tx) * (TIntermediate(1) - TIntermediate(ty));
2142 const TIntermediate tx_ty = (TIntermediate(1) - TIntermediate(tx)) * TIntermediate(ty);
2143 const TIntermediate tx_ty_ = (TIntermediate(1) - TIntermediate(tx)) * (TIntermediate(1) - TIntermediate(ty));
2144
2145 ocean_assert_accuracy(NumericT<TIntermediate>::isEqual(txty + txty_ + tx_ty + tx_ty_, TIntermediate(1)));
2146
2147 for (unsigned int n = 0u; n < tChannels; ++n)
2148 {
2149 result[n] = TTarget(TIntermediate(topLeft[n]) * tx_ty_ + TIntermediate(topLeft[rightOffset + n]) * txty_ + TIntermediate(topLeft[bottomOffset + n]) * tx_ty + TIntermediate(topLeft[bottomOffset + rightOffset + n]) * txty + resultBias);
2150 }
2151 }
2152}
2153
2154template <unsigned int tChannels, bool tAlphaAtFront, bool tTransparentIs0xFF>
2155inline void FrameInterpolatorBilinear::interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t* frame, const unsigned int width, const unsigned int height, const Vector2& position, uint8_t* result, const unsigned int framePaddingElements)
2156{
2157 static_assert(tChannels != 0u, "Invalid channel number!");
2158
2159 ocean_assert(frame && result);
2160
2161 const Vector2 pos(position.x() - Scalar(0.5), position.y() - Scalar(0.5));
2162
2163 // check whether the position is outside the frame and will therefore be 100% transparent
2164 if (pos.x() <= Scalar(-1) || pos.y() <= Scalar(-1) || pos.x() >= Scalar(width) || pos.y() >= Scalar(height))
2165 {
2166 for (unsigned int n = 0u; n < tChannels - 1u; ++n)
2167 {
2169 }
2170
2171 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2172
2173 return;
2174 }
2175
2176 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
2177
2178 const int left = int(Numeric::floor(pos.x()));
2179 const int top = int(Numeric::floor(pos.y()));
2180
2181 ocean_assert(left >= -1 && left < int(width));
2182 ocean_assert(top >= -1 && top < int(height));
2183
2184 if ((unsigned int)left < width - 1u && (unsigned int)top < height - 1u)
2185 {
2186 // we have a valid pixel position for the left, top, right and bottom pixel
2187
2188 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2189 const unsigned int txi_ = 128u - txi;
2190
2191 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2192 const unsigned int tyi_ = 128u - tyi;
2193
2194 const uint8_t* const topLeft = frame + top * frameStrideElements + left * tChannels;
2195
2196 const unsigned int txty = txi * tyi;
2197 const unsigned int txty_ = txi * tyi_;
2198 const unsigned int tx_ty = txi_ * tyi;
2199 const unsigned int tx_ty_ = txi_ * tyi_;
2200
2201 for (unsigned int n = 0u; n < tChannels; ++n)
2202 {
2203 result[n] = (topLeft[n] * tx_ty_ + topLeft[tChannels + n] * txty_
2204 + topLeft[frameStrideElements + n] * tx_ty + topLeft[frameStrideElements + tChannels + n] * txty + 8192u) >> 14u;
2205 }
2206 }
2207 else
2208 {
2209 // we do not have a valid pixel for all 4-neighborhood pixels
2210
2211 const unsigned int txi = (unsigned int)((pos.x() - Scalar(left)) * Scalar(128) + Scalar(0.5));
2212 const unsigned int txi_ = 128u - txi;
2213
2214 const unsigned int tyi = (unsigned int)((pos.y() - Scalar(top)) * Scalar(128) + Scalar(0.5));
2215 const unsigned int tyi_ = 128u - tyi;
2216
2217 const unsigned int rightOffset = (left >= 0 && left + 1u < width) ? tChannels : 0u;
2218 const unsigned int bottomOffset = (top >= 0 && top + 1u < height) ? frameStrideElements : 0u;
2219
2220 ocean_assert(left < int(width) && top < int(height));
2221 const uint8_t* const topLeft = frame + max(0, top) * frameStrideElements + max(0, left) * tChannels;
2222
2223 const unsigned int txty = txi * tyi;
2224 const unsigned int txty_ = txi * tyi_;
2225 const unsigned int tx_ty = txi_ * tyi;
2226 const unsigned int tx_ty_ = txi_ * tyi_;
2227
2228 for (unsigned int n = FrameBlender::SourceOffset<tAlphaAtFront>::data(); n < tChannels + FrameBlender::SourceOffset<tAlphaAtFront>::data() - 1u; ++n)
2229 {
2230 result[n] = (topLeft[n] * tx_ty_ + topLeft[rightOffset + n] * txty_
2231 + topLeft[bottomOffset + n] * tx_ty + topLeft[bottomOffset + rightOffset + n] * txty + 8192u) >> 14u;
2232 }
2233
2234 const uint8_t alphaTopLeft = (left >= 0 && top >= 0) ? topLeft[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2235 const uint8_t alphaTopRight = (left + 1u < width && top >= 0) ? topLeft[rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2236 const uint8_t alphaBottomLeft = (left >= 0 && top + 1u < height) ? topLeft[bottomOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2237 const uint8_t alphaBottomRight = (left + 1u < width && top + 1u < height) ? topLeft[bottomOffset + rightOffset + FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] : FrameBlender::fullTransparent8Bit<tTransparentIs0xFF>();
2238
2239 result[FrameBlender::SourceOffset<tAlphaAtFront>::template alpha<tChannels>()] = (alphaTopLeft * tx_ty_ + alphaTopRight * txty_ + alphaBottomLeft * tx_ty + alphaBottomRight * txty + 8192u) >> 14u;
2240 }
2241}
2242
2243template <unsigned int tChannels>
2244void FrameInterpolatorBilinear::affine8BitPerChannelSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberOutputRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2245{
2246 static_assert(tChannels >= 1u, "Invalid channel number!");
2247
2248 ocean_assert(source != nullptr && target != nullptr);
2249 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2250 ocean_assert_and_suppress_unused(targetWidth > 0u && targetHeight > 0u, targetHeight);
2251 ocean_assert(source_A_target);
2252 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2253
2254 ocean_assert(firstTargetRow + numberOutputRows <= targetHeight);
2255
2256 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2257
2258 const Scalar scalarSourceWidth_1 = Scalar(sourceWidth - 1u);
2259 const Scalar scalarSourceHeight_1 = Scalar(sourceHeight - 1u);
2260
2261 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2262
2263 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2264 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2265
2266 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberOutputRows; ++y)
2267 {
2268 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2269
2270 /*
2271 * We can slightly optimize the 3x3 matrix multiplication:
2272 *
2273 * | X0 Y0 Z0 | | x |
2274 * | X1 Y1 Z1 | * | y |
2275 * | 0 0 1 | | 1 |
2276 *
2277 * | xx | | X0 * x | | Y0 * y + Z0 |
2278 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2279 *
2280 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2281 *
2282 * C0 = Y0 * y + Z0
2283 * C1 = Y1 * y + Z1
2284 *
2285 * So the computation becomes:
2286 *
2287 * | x' | | X0 * x | | C0 |
2288 * | y' | = | X1 * x | + | C1 |
2289 */
2290
2291 const Vector2 X(source_A_target->data() + 0);
2292 const Vector2 c(Vector2(source_A_target->data() + 3) * Scalar(y) + Vector2(source_A_target->data() + 6));
2293
2294 for (unsigned int x = 0u; x < targetWidth; ++x)
2295 {
2296 const Vector2 sourcePosition = X * Scalar(x) + c;
2297
2298#ifdef OCEAN_DEBUG
2299 const Scalar debugSourceX = (*source_A_target)[0] * Scalar(x) + (*source_A_target)[3] * Scalar(y) + (*source_A_target)[6];
2300 const Scalar debugSourceY = (*source_A_target)[1] * Scalar(x) + (*source_A_target)[4] * Scalar(y) + (*source_A_target)[7];
2301 ocean_assert(sourcePosition.isEqual(Vector2(debugSourceX, debugSourceY), Scalar(0.01)));
2302#endif
2303
2304 if (sourcePosition.x() < Scalar(0) || sourcePosition.x() > scalarSourceWidth_1 || sourcePosition.y() < Scalar(0) || sourcePosition.y() > scalarSourceHeight_1)
2305 {
2306 *targetRow = *bColor;
2307 }
2308 else
2309 {
2310 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, sourceWidth, sourceHeight, sourcePaddingElements, sourcePosition, (uint8_t*)(targetRow));
2311 }
2312
2313 targetRow++;
2314 }
2315 }
2316}
2317
2318template <unsigned int tChannels>
2319void FrameInterpolatorBilinear::homography8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2320{
2321 static_assert(tChannels >= 1u, "Invalid channel number!");
2322
2323 ocean_assert(input != nullptr && output != nullptr);
2324 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2325 ocean_assert(outputWidth > 0u && outputHeight > 0u);
2326 ocean_assert(input_H_output != nullptr);
2327
2328 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2329
2330 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2331
2332 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
2333 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
2334
2335 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2336
2337 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2338 const PixelType bColor = borderColor ? *(PixelType*)borderColor : *(PixelType*)zeroColor;
2339
2340 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2341 {
2342 /*
2343 * We can slightly optimize the 3x3 matrix multiplication:
2344 *
2345 * | X0 Y0 Z0 | | x |
2346 * | X1 Y1 Z1 | * | y |
2347 * | X2 Y2 Z2 | | 1 |
2348 *
2349 * | xx | | X0 * x | | Y0 * y + Z0 |
2350 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2351 * | zz | | X2 * x | | Y2 * y + Z2 |
2352 *
2353 * | xx | | X0 * x | | C0 |
2354 * | yy | = | X1 * x | + | C1 |
2355 * | zz | | X2 * x | | C2 |
2356 *
2357 * As y is constant within the inner loop, we can pre-calculate the following terms:
2358 *
2359 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2360 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2361 */
2362
2363 const Vector2 X(input_H_output->data() + 0);
2364 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2365
2366 const Scalar X2 = (*input_H_output)(2, 0);
2367 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2368
2369 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2370
2371 for (unsigned int x = 0u; x < outputWidth; ++x)
2372 {
2373 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2374 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2375
2376#ifdef OCEAN_DEBUG
2377 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2378 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2379#endif
2380
2381 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
2382 {
2383 *outputRowPixel = bColor;
2384 }
2385 else
2386 {
2387 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputRowPixel));
2388 }
2389
2390 ++outputRowPixel;
2391 }
2392 }
2393}
2394
2395template <typename T, unsigned int tChannels>
2396void FrameInterpolatorBilinear::homographySubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const T* borderColor, T* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2397{
2398 static_assert(tChannels >= 1u, "Invalid channel number!");
2399
2400 ocean_assert(input != nullptr && output != nullptr);
2401 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2402 ocean_assert_and_suppress_unused(outputWidth > 0u && outputHeight > 0u, outputHeight);
2403 ocean_assert(input_H_output != nullptr);
2404
2405 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
2406
2407 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2408
2409 const Scalar scalarInputWidth1 = Scalar(inputWidth - 1u);
2410 const Scalar scalarInputHeight1 = Scalar(inputHeight - 1u);
2411
2412 // we need to find a best matching floating point data type for the intermediate interpolation results
2413 using TIntermediate = typename FloatTyper<T>::Type;
2414
2415 using PixelType = typename DataType<T, tChannels>::Type;
2416
2417 constexpr T zeroColor[tChannels] = {T(0)};
2418 const PixelType* const bColor = borderColor ? (PixelType*)(borderColor) : (PixelType*)(zeroColor);
2419
2420 constexpr TIntermediate bias = TIntermediate(0);
2421
2422 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2423 {
2424 /*
2425 * We can slightly optimize the 3x3 matrix multiplication:
2426 *
2427 * | X0 Y0 Z0 | | x |
2428 * | X1 Y1 Z1 | * | y |
2429 * | X2 Y2 Z2 | | 1 |
2430 *
2431 * | xx | | X0 * x | | Y0 * y + Z0 |
2432 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2433 * | zz | | X2 * x | | Y2 * y + Z2 |
2434 *
2435 * | xx | | X0 * x | | C0 |
2436 * | yy | = | X1 * x | + | C1 |
2437 * | zz | | X2 * x | | C3 |
2438 *
2439 * As y is constant within the inner loop, we can pre-calculate the following terms:
2440 *
2441 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2442 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2443 */
2444
2445 const Vector2 X(input_H_output->data() + 0);
2446 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
2447
2448 const Scalar X2 = (*input_H_output)(2, 0);
2449 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
2450
2451 PixelType* outputRowPixel = (PixelType*)(output + y * outputStrideElements);
2452
2453 for (unsigned int x = 0u; x < outputWidth; ++x)
2454 {
2455 ocean_assert_accuracy(Numeric::isNotEqualEps((X2 * Scalar(x) + constValue2)));
2456 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
2457
2458#ifdef OCEAN_DEBUG
2459 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
2460 ocean_assert((std::is_same<float, Scalar>::value) || inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
2461#endif
2462
2463 if (inputPosition.x() >= Scalar(0) && inputPosition.x() <= scalarInputWidth1 && inputPosition.y() >= Scalar(0) && inputPosition.y() <= scalarInputHeight1)
2464 {
2465 interpolatePixel<T, T, tChannels, CV::PC_TOP_LEFT, Scalar, TIntermediate>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputRowPixel), bias);
2466 }
2467 else
2468 {
2469 *outputRowPixel = *bColor;
2470 }
2471
2472 ++outputRowPixel;
2473 }
2474 }
2475}
2476
2477#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
2478
2479template <unsigned int tChannels>
2480inline void FrameInterpolatorBilinear::affine8BitPerChannelSSESubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
2481{
2482 static_assert(tChannels >= 1u, "Invalid channel number!");
2483
2484 ocean_assert(source && target);
2485 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
2486 ocean_assert(targetWidth >= 4u && targetHeight > 0u);
2487 ocean_assert(source_A_target);
2488 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
2489
2490 ocean_assert_and_suppress_unused(firstTargetRow + numberTargetRows <= targetHeight, targetHeight);
2491
2492 const unsigned int sourceStrideElements = tChannels * sourceWidth + sourcePaddingElements;
2493 const unsigned int targetStrideElements = tChannels * targetWidth + targetPaddingElements;
2494
2495 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2496
2497 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2498 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2499
2500 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2501
2502 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2503 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2504 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2505 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2506
2507 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2508 const __m128 m128_f_X0 = _mm_set_ps1(float((*source_A_target)(0, 0)));
2509 const __m128 m128_f_X1 = _mm_set_ps1(float((*source_A_target)(1, 0)));
2510
2511 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
2512 {
2513 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
2514
2515 /*
2516 * We can slightly optimize the 3x3 matrix multiplication:
2517 *
2518 * | X0 Y0 Z0 | | x |
2519 * | X1 Y1 Z1 | * | y |
2520 * | 0 0 1 | | 1 |
2521 *
2522 * | xx | | X0 * x | | Y0 * y + Z0 |
2523 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2524 *
2525 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
2526 *
2527 * C0 = Y0 * y + Z0
2528 * C1 = Y1 * y + Z1
2529 *
2530 * So the computation becomes:
2531 *
2532 * | x' | | X0 * x | | C0 |
2533 * | y' | = | X1 * x | + | C1 |
2534 */
2535
2536 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2537 const __m128 m128_f_C0 = _mm_set_ps1(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
2538 const __m128 m128_f_C1 = _mm_set_ps1(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
2539
2540 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2541 const __m128 m128_f_zero = _mm_setzero_ps();
2542
2543 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2544 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2545
2546 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
2547 const __m128i m128_i_sourceStrideElements = _mm_set1_epi32(sourceStrideElements);
2548
2549 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2550 const __m128i m128_i_sourceWidth_1 = _mm_set1_epi32(int(sourceWidth) - 1);
2551 const __m128i m128_i_sourceHeight_1 = _mm_set1_epi32(int(sourceHeight) - 1);
2552
2553 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2554 const __m128 m128_f_sourceWidth_1 = _mm_set_ps1(float(sourceWidth - 1u));
2555 const __m128 m128_f_sourceHeight_1 = _mm_set_ps1(float(sourceHeight - 1u));
2556
2557 for (unsigned int x = 0u; x < targetWidth; x += 4u)
2558 {
2559 if (x + 4u > targetWidth)
2560 {
2561 // the last iteration will not fit into the output frame,
2562 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2563
2564 ocean_assert(x >= 4u && targetWidth > 4u);
2565 const unsigned int newX = targetWidth - 4u;
2566
2567 ocean_assert(x > newX);
2568 targetRow -= x - newX;
2569
2570 x = newX;
2571
2572 // the for loop will stop after this iteration
2573 ocean_assert(!(x + 4u < targetWidth));
2574 }
2575
2576
2577 // we need four successive x coordinate floats:
2578 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2579 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2580
2581 // we calculate xx and yy for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2582 const __m128 m128_f_sourceX = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2583 const __m128 m128_f_sourceY = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2584
2585 // now we check whether we are inside the input frame
2586 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps(m128_f_sourceX, m128_f_sourceWidth_1), _mm_cmpge_ps(m128_f_sourceX, m128_f_zero)); // inputPosition.x() <= (inputWidth - 1) && inputPosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
2587 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps(m128_f_sourceY, m128_f_sourceHeight_1), _mm_cmpge_ps(m128_f_sourceY, m128_f_zero)); // inputPosition.y() <= (inputHeight - 1) && inputPosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
2588
2589 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFFFF : 0x00000000
2590
2591 // we can stop here if all pixels are invalid
2592 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2593 {
2594#ifdef OCEAN_DEBUG
2595 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2596 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2597 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2598#endif
2599
2600 targetRow[0] = *bColor;
2601 targetRow[1] = *bColor;
2602 targetRow[2] = *bColor;
2603 targetRow[3] = *bColor;
2604
2605 targetRow += 4;
2606
2607 continue;
2608 }
2609
2610 // we store the result
2611 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2612 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2613
2614
2615 // now we determine the left, top, right and bottom pixel used for the interpolation
2616 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_sourceX);
2617 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_sourceY);
2618
2619 // left = floor(x); top = floor(y)
2620 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2621 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2622
2623 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2624 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_sourceWidth_1);
2625 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_sourceHeight_1);
2626
2627 // offset = (y * sourceStrideElements + tChannels * x)
2628 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * sourceStrideElements + tChannels * left)
2629 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * sourceStrideElements + tChannels * right)
2630 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2631 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_sourceStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2632
2633 // we store the offsets
2634 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2635 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2636 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2637 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2638
2639
2640 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2641
2642 // we determine the fractional portions of the x' and y':
2643 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2644 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2645 __m128 m128_f_tx = _mm_sub_ps(m128_f_sourceX, m128_f_tx_floor);
2646 __m128 m128_f_ty = _mm_sub_ps(m128_f_sourceY, m128_f_ty_floor);
2647
2648 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2649 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2650 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2651
2652 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2653 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2654
2655 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2656 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2657
2658 interpolate4Pixels8BitPerChannelSSE<tChannels>(source, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, targetRow);
2659 targetRow += 4;
2660 }
2661 }
2662}
2663
2664template <unsigned int tChannels>
2665inline void FrameInterpolatorBilinear::homography8BitPerChannelSSESubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
2666{
2667 static_assert(tChannels >= 1u, "Invalid channel number!");
2668
2669 ocean_assert(input != nullptr && output != nullptr);
2670 ocean_assert(inputWidth > 0u && inputHeight > 0u);
2671 ocean_assert(outputWidth >= 4u && outputHeight > 0u);
2672 ocean_assert(input_H_output != nullptr);
2673
2674 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
2675
2676 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
2677 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
2678
2679 using PixelType = typename DataType<uint8_t, tChannels>::Type;
2680
2681 uint8_t zeroColor[tChannels] = {uint8_t(0)};
2682 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
2683
2684 OCEAN_ALIGN_DATA(16) unsigned int validPixels[4];
2685
2686 OCEAN_ALIGN_DATA(16) unsigned int topLeftOffsets[4];
2687 OCEAN_ALIGN_DATA(16) unsigned int topRightOffsets[4];
2688 OCEAN_ALIGN_DATA(16) unsigned int bottomLeftOffsets[4];
2689 OCEAN_ALIGN_DATA(16) unsigned int bottomRightOffsets[4];
2690
2691 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
2692 const __m128 m128_f_X0 = _mm_set_ps1(float((*input_H_output)(0, 0)));
2693 const __m128 m128_f_X1 = _mm_set_ps1(float((*input_H_output)(1, 0)));
2694 const __m128 m128_f_X2 = _mm_set_ps1(float((*input_H_output)(2, 0)));
2695
2696 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
2697 const __m128 m128_f_zero = _mm_setzero_ps();
2698
2699 // we store 4 integers: [tChannels, tChannels, tChannels, tChannels]
2700 const __m128i m128_i_channels = _mm_set1_epi32(tChannels);
2701
2702 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
2703 const __m128i m128_i_inputStrideElements = _mm_set1_epi32(inputStrideElements);
2704
2705 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth -1, inputWidth -1], and same with inputHeight
2706 const __m128i m128_i_inputWidth_1 = _mm_set1_epi32(int(inputWidth) - 1);
2707 const __m128i m128_i_inputHeight_1 = _mm_set1_epi32(int(inputHeight) - 1);
2708
2709 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
2710 const __m128 m128_f_inputWidth_1 = _mm_set_ps1(float(inputWidth - 1u));
2711 const __m128 m128_f_inputHeight_1 = _mm_set_ps1(float(inputHeight - 1u));
2712
2713 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
2714 {
2715 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
2716
2717 /*
2718 * We can slightly optimize the 3x3 matrix multiplication:
2719 *
2720 * | X0 Y0 Z0 | | x |
2721 * | X1 Y1 Z1 | * | y |
2722 * | X2 Y2 Z2 | | 1 |
2723 *
2724 * | xx | | X0 * x | | Y0 * y + Z0 |
2725 * | yy | = | X1 * x | + | Y1 * y + Z1 |
2726 * | zz | | X2 * x | | Y2 * y + Z2 |
2727 *
2728 * | xx | | X0 * x | | C0 |
2729 * | yy | = | X1 * x | + | C1 |
2730 * | zz | | X2 * x | | C2 |
2731 *
2732 * As y is constant within the inner loop, we can pre-calculate the following terms:
2733 *
2734 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
2735 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
2736 */
2737
2738 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
2739 const __m128 m128_f_C0 = _mm_set_ps1(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
2740 const __m128 m128_f_C1 = _mm_set_ps1(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
2741 const __m128 m128_f_C2 = _mm_set_ps1(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
2742
2743 for (unsigned int x = 0u; x < outputWidth; x += 4u)
2744 {
2745 if (x + 4u > outputWidth)
2746 {
2747 // the last iteration will not fit into the output frame,
2748 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
2749
2750 ocean_assert(x >= 4u && outputWidth > 4u);
2751 const unsigned int newX = outputWidth - 4u;
2752
2753 ocean_assert(x > newX);
2754 outputPixelData -= x - newX;
2755
2756 x = newX;
2757
2758 // the for loop will stop after this iteration
2759 ocean_assert(!(x + 4u < outputWidth));
2760 }
2761
2762
2763 // we need four successive x coordinate floats:
2764 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
2765 const __m128 m128_f_x_0123 = _mm_set_ps(float(x + 3u), float(x + 2u), float(x + 1u), float(x + 0u));
2766
2767 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
2768 const __m128 m128_f_xx = _mm_add_ps(_mm_mul_ps(m128_f_X0, m128_f_x_0123), m128_f_C0);
2769 const __m128 m128_f_yy = _mm_add_ps(_mm_mul_ps(m128_f_X1, m128_f_x_0123), m128_f_C1);
2770 const __m128 m128_f_zz = _mm_add_ps(_mm_mul_ps(m128_f_X2, m128_f_x_0123), m128_f_C2);
2771
2772#ifdef USE_APPROXIMATED_INVERSE_OF_ZZ // (not defined by default)
2773
2774 // we calculate the (approximated) inverse of zz,
2775 // the overall performance will be approx. 5% better while the accuracy will be slightly worse:
2776 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
2777 const __m128 inv_zz_128 = _mm_rcp_ps(m128_f_zz);
2778
2779 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2780 const __m128 m128_f_inputX = _mm_mul_ps(m128_f_xx, inv_zz_128);
2781 const __m128 m128_f_inputY = _mm_mul_ps(m128_f_yy, inv_zz_128);
2782
2783#else
2784
2785 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
2786 const __m128 m128_f_inputX = _mm_div_ps(m128_f_xx, m128_f_zz);
2787 const __m128 m128_f_inputY = _mm_div_ps(m128_f_yy, m128_f_zz);
2788
2789#endif // USE_APPROXIMATED_INVERSE_OF_ZZ
2790
2791
2792 // now we check whether we are inside the input frame
2793 const __m128 m128_f_validPixelX = _mm_and_ps(_mm_cmple_ps (m128_f_inputX, m128_f_inputWidth_1), _mm_cmpge_ps(m128_f_inputX, m128_f_zero)); // inputPosition.x() <= (inputWidth-1) && inputPosition.x() >= 0 ? 0xFFFFFF : 0x000000
2794 const __m128 m128_f_validPixelY = _mm_and_ps(_mm_cmple_ps (m128_f_inputY, m128_f_inputHeight_1), _mm_cmpge_ps(m128_f_inputY, m128_f_zero)); // inputPosition.y() <= (inputHeight-1) && inputPosition.y() >= 0 ? 0xFFFFFF : 0x000000
2795
2796 const __m128i m128_i_validPixel = _mm_castps_si128(_mm_and_ps(m128_f_validPixelX, m128_f_validPixelY)); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
2797
2798 // we can stop here if all pixels are invalid
2799 if (_mm_test_all_zeros(m128_i_validPixel, _mm_set1_epi32(0xFFFFFFFF)))
2800 {
2801#ifdef OCEAN_DEBUG
2802 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
2803 _mm_store_si128((__m128i*)debugValidPixels, m128_i_validPixel);
2804 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
2805#endif
2806
2807 outputPixelData[0] = *bColor;
2808 outputPixelData[1] = *bColor;
2809 outputPixelData[2] = *bColor;
2810 outputPixelData[3] = *bColor;
2811
2812 outputPixelData += 4;
2813
2814 continue;
2815 }
2816
2817 // we store the result
2818 _mm_store_si128((__m128i*)validPixels, m128_i_validPixel);
2819 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
2820
2821
2822 // now we determine the left, top, right and bottom pixel used for the interpolation
2823 const __m128 m128_f_tx_floor = _mm_floor_ps(m128_f_inputX);
2824 const __m128 m128_f_ty_floor = _mm_floor_ps(m128_f_inputY);
2825
2826 // left = floor(x); top = floor(y)
2827 const __m128i m128_i_left = _mm_cvtps_epi32(m128_f_tx_floor);
2828 const __m128i m128_i_top = _mm_cvtps_epi32(m128_f_ty_floor);
2829
2830 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
2831 const __m128i m128_i_right = _mm_min_epu32(_mm_add_epi32(m128_i_left, _mm_set1_epi32(1)), m128_i_inputWidth_1);
2832 const __m128i m128_i_bottom = _mm_min_epu32(_mm_add_epi32(m128_i_top, _mm_set1_epi32(1)), m128_i_inputHeight_1);
2833
2834 // offset = (y * inputStrideElements + tChannels * x)
2835 const __m128i m128_i_topLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // topleftOffset = (top * inputStrideElements + tChannels * left)
2836 const __m128i m128_i_topRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_top, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right)); // toprightOffset = (top * inputStrideElements + tChannels * right)
2837 const __m128i m128_i_bottomLeftOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_left)); // ...
2838 const __m128i m128_i_bottomRightOffset = _mm_add_epi32(_mm_mullo_epi32(m128_i_bottom, m128_i_inputStrideElements), _mm_mullo_epi32(m128_i_channels, m128_i_right));
2839
2840 // we store the offsets
2841 _mm_store_si128((__m128i*)topLeftOffsets, m128_i_topLeftOffset);
2842 _mm_store_si128((__m128i*)topRightOffsets, m128_i_topRightOffset);
2843 _mm_store_si128((__m128i*)bottomLeftOffsets, m128_i_bottomLeftOffset);
2844 _mm_store_si128((__m128i*)bottomRightOffsets, m128_i_bottomRightOffset);
2845
2846
2847 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
2848
2849 // we determine the fractional portions of the x' and y':
2850 // e.g., [43.1231, -12.5543, -34.123, 99.2]
2851 // [ 0.1231, 0.4457, 0.877, 0.2] // note the result for negative value - but we will not process negative values anyway due to 'validPixel'
2852 __m128 m128_f_tx = _mm_sub_ps(m128_f_inputX, m128_f_tx_floor);
2853 __m128 m128_f_ty = _mm_sub_ps(m128_f_inputY, m128_f_ty_floor);
2854
2855 // we use integer interpolation [0.0, 1.0] -> [0, 128]
2856 m128_f_tx = _mm_mul_ps(m128_f_tx, _mm_set_ps1(128.0f));
2857 m128_f_ty = _mm_mul_ps(m128_f_ty, _mm_set_ps1(128.0f));
2858
2859 m128_f_tx = _mm_round_ps(m128_f_tx, _MM_FROUND_TO_NEAREST_INT);
2860 m128_f_ty = _mm_round_ps(m128_f_ty, _MM_FROUND_TO_NEAREST_INT);
2861
2862 const __m128i m128_i_tx = _mm_cvtps_epi32(m128_f_tx);
2863 const __m128i m128_i_ty = _mm_cvtps_epi32(m128_f_ty);
2864
2865 interpolate4Pixels8BitPerChannelSSE<tChannels>(input, topLeftOffsets, topRightOffsets, bottomLeftOffsets, bottomRightOffsets, validPixels, *bColor, m128_i_tx, m128_i_ty, outputPixelData);
2866 outputPixelData += 4;
2867 }
2868 }
2869}
2870
2871template <>
2872OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2873{
2874 // sourcesTopLeft stores the three color values of 4 (independent) pixels (the upper left pixels):
2875 // FEDC BA98 7654 3210
2876 // ---- VUYV UYVU YVUY
2877 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2878
2879 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2880 // FEDC BA98 7654 3210
2881 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2882
2883
2884 // we will simply extract each channel from the source pixels,
2885 // each extracted channel will be multiplied by the corresponding interpolation factor
2886 // and all interpolation results will be accumulated afterwards
2887
2888 // FEDC BA98 7654 3210
2889 const __m128i mask32_Channel0 = SSE::set128i(0xFFFFFF09FFFFFF06ull, 0xFFFFFF03FFFFFF00ull); // ---9 ---6 ---3 ---0
2890 const __m128i mask32_Channel1 = SSE::set128i(0xFFFFFF0AFFFFFF07ull, 0xFFFFFF04FFFFFF01ull); // ---A ---7 ---4 ---1
2891 const __m128i mask32_Channel2 = SSE::set128i(0xFFFFFF0BFFFFFF08ull, 0xFFFFFF05FFFFFF02ull); // ---B ---8 ---5 ---2
2892
2893
2894 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2895 // FEDC BA98 7654 3210
2896 // ---9 ---6 ---3 ---0
2897 // *
2898 // FTL3 FTL2 FTL1 FTL0
2899 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2900
2901 // we the same multiplication for the second channel
2902 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2903
2904 // and third channel
2905 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2906
2907
2908 // now we repeat the process for the top right pixel values
2909 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2910 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2911 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
2912
2913
2914 // and for the bottom left pixel values
2915 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
2916 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
2917 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
2918
2919
2920 // and for the bottom right pixel values
2921 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
2922 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
2923 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
2924
2925
2926 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
2927
2928 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
2929
2930 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
2931 // target data: ---9 ---6 ---3 ---0
2932 // shufflet target: ---- --9- -6-- 3--0
2933 // mask location: ---C ---8 ---4 ---0
2934 // mask: ---- --C- -8-- 4--0
2935 __m128i interpolation_channel0 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFF0CFFull, 0xFF08FFFF04FFFF00ull));
2936
2937 // target data: ---A ---7 ---4 ---1
2938 // shufflet target: ---- -A-- 7--4 --1-
2939 // mask location: ---C ---8 ---4 ---0
2940 // mask: ---- -C-- 8--4 --0-
2941 __m128i interpolation_channel1 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFF0CFFFFull, 0x08FFFF04FFFF00FFull));
2942
2943 // target data: ---B ---8 ---5 ---2
2944 // shufflet target: ---- B--8 --5- -2--
2945 // mask location: ---C ---8 ---4 ---0
2946 // mask: ---- C--8 --4- -0--
2947 __m128i interpolation_channel2 = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), SSE::set128i(0xFFFFFFFF0CFFFF08ull, 0xFFFF04FFFF00FFFFull));
2948
2949
2950 // finally, we simply blend all interpolation results together
2951
2952 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), interpolation_channel2);
2953}
2954
2955template <>
2956OCEAN_FORCE_INLINE __m128i FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const __m128i& sourcesTopLeft, const __m128i& sourcesTopRight, const __m128i& sourcesBottomLeft, const __m128i& sourcesBottomRight, const __m128i& factorsTopLeft, const __m128i& factorsTopRight, const __m128i& factorsBottomLeft, const __m128i& factorsBottomRight)
2957{
2958 // sourcesTopLeft stores the four color values of 4 (independent) pixels (the upper left pixels):
2959 // FEDC BA98 7654 3210
2960 // AVUY AVUY AVUY AVUY
2961 // sourcesTopRight, sourcesBottomLeft, sourcesBottomRight have the same pattern
2962
2963 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
2964 // FEDC BA98 7654 3210
2965 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
2966
2967
2968 // we will simply extract each channel from the source pixels,
2969 // each extracted channel will be multiplied by the corresponding interpolation factor
2970 // and all interpolation results will be accumulated afterwards
2971
2972 // FEDC BA98 7654 3210
2973 const __m128i mask32_Channel0 = SSE::set128i(0xA0A0A00CA0A0A008ull, 0xA0A0A004A0A0A000ull); // ---C ---8 ---4 ---0
2974 const __m128i mask32_Channel1 = SSE::set128i(0xA0A0A00DA0A0A009ull, 0xA0A0A005A0A0A001ull); // ---D ---9 ---5 ---1
2975 const __m128i mask32_Channel2 = SSE::set128i(0xA0A0A00EA0A0A00Aull, 0xA0A0A006A0A0A002ull); // ---E ---A ---6 ---2
2976 const __m128i mask32_Channel3 = SSE::set128i(0xA0A0A00FA0A0A00Bull, 0xA0A0A007A0A0A003ull); // ---F ---B ---7 ---3
2977
2978
2979 // we extract the first channel from the top left pixel values and multiply the channel with the interpolation factors
2980 // FEDC BA98 7654 3210
2981 // ---C ---8 ---4 ---0
2982 // *
2983 // FTL3 FTL2 FTL1 FTL0
2984 __m128i multiplication_channel0 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel0));
2985
2986 // we the same multiplication for the second channel
2987 __m128i multiplication_channel1 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel1));
2988
2989 // and third channel
2990 __m128i multiplication_channel2 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel2));
2991
2992 // and last channel
2993 __m128i multiplication_channel3 = _mm_mullo_epi32(factorsTopLeft, _mm_shuffle_epi8(sourcesTopLeft, mask32_Channel3));
2994
2995
2996 // now we repeat the process for the top right pixel values
2997 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel0)));
2998 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel1)));
2999 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel2)));
3000 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsTopRight, _mm_shuffle_epi8(sourcesTopRight, mask32_Channel3)));
3001
3002
3003 // and for the bottom left pixel values
3004 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel0)));
3005 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel1)));
3006 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel2)));
3007 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomLeft, _mm_shuffle_epi8(sourcesBottomLeft, mask32_Channel3)));
3008
3009
3010 // and for the bottom right pixel values
3011 multiplication_channel0 = _mm_add_epi32(multiplication_channel0, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel0)));
3012 multiplication_channel1 = _mm_add_epi32(multiplication_channel1, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel1)));
3013 multiplication_channel2 = _mm_add_epi32(multiplication_channel2, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel2)));
3014 multiplication_channel3 = _mm_add_epi32(multiplication_channel3, _mm_mullo_epi32(factorsBottomRight, _mm_shuffle_epi8(sourcesBottomRight, mask32_Channel3)));
3015
3016
3017 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3018
3019 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128)
3020
3021 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3022 // ---C ---8 ---4 ---0
3023 // ---C ---9 ---4 ---0
3024 __m128i interpolation_channel0 = _mm_srli_epi32(_mm_add_epi32(multiplication_channel0, m128_i_8192), 14);
3025
3026 // in addition to rounding and shifting, we need to move the interpolation results to the correct channel:
3027 // ---D ---9 ---5 ---1
3028 // --D- --9- --5- --1-
3029 __m128i interpolation_channel1 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel1, m128_i_8192), 14), 8);
3030
3031 // ---E ---A ---6 ---2
3032 // -E-- -A-- -6-- -2--
3033 __m128i interpolation_channel2 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel2, m128_i_8192), 14), 16);
3034
3035 // ---F ---B ---7 ---3
3036 // F--- B--- 7--- 3---
3037 __m128i interpolation_channel3 = _mm_slli_epi32(_mm_srli_epi32(_mm_add_epi32(multiplication_channel3, m128_i_8192), 14), 24);
3038
3039
3040 // finally, we simply blend all interpolation results together
3041
3042 return _mm_or_si128(_mm_or_si128(interpolation_channel0, interpolation_channel1), _mm_or_si128(interpolation_channel2, interpolation_channel3));
3043}
3044
3045#ifdef OCEAN_COMPILER_MSC
3046
3047// we see a significant performance decrease with non-VS compilers/platforms,
3048// so we do not use the 3channel version with non-Windows compilers
3049
3050template <>
3051OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<1u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3052{
3053 ocean_assert(source != nullptr);
3054 ocean_assert(targetPositionPixels != nullptr);
3055
3056 using PixelType = typename DataType<uint8_t, 1u>::Type;
3057
3058 // as we do not initialize the following intermediate data,
3059 // we hopefully will not allocate memory on the stack each time this function is called
3060 OCEAN_ALIGN_DATA(16) PixelType pixels[16];
3061
3062 // we gather the individual source pixel values from the source image,
3063 // based on the calculated pixel locations
3064 for (unsigned int i = 0u; i < 4u; ++i)
3065 {
3066 if (validPixels[i])
3067 {
3068 pixels[i * 4u + 0u] = *((PixelType*)(source + offsetsTopLeft[i]));
3069 pixels[i * 4u + 1u] = *((PixelType*)(source + offsetsTopRight[i]));
3070 pixels[i * 4u + 2u] = *((PixelType*)(source + offsetsBottomLeft[i]));
3071 pixels[i * 4u + 3u] = *((PixelType*)(source + offsetsBottomRight[i]));
3072 }
3073 else
3074 {
3075 pixels[i * 4u + 0u] = borderColor;
3076 pixels[i * 4u + 1u] = borderColor;
3077 pixels[i * 4u + 2u] = borderColor;
3078 pixels[i * 4u + 3u] = borderColor;
3079 }
3080 }
3081
3082 static_assert(sizeof(__m128i) == sizeof(pixels), "Invalid data type!");
3083
3084 const __m128i m128_pixels = _mm_load_si128((const __m128i*)pixels);
3085
3086
3087 // factorLeft = 128 - factorRight
3088 // factorTop = 128 - factorBottom
3089
3090 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3091 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3092
3093 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3094 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3095
3096 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3097 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3098 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3099 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3100
3101 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3102 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3103 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3104
3105 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3106 // FEDC BA98 7654 3210
3107 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3108
3109
3110 // we will simply extract each channel from the source pixels,
3111 // each extracted channel will be multiplied by the corresponding interpolation factor
3112 // and all interpolation results will be accumulated afterwards
3113
3114 // FEDC BA98 7654 3210
3115 const __m128i mask32_topLeft = SSE::set128i(0xFFFFFF0CFFFFFF08ull, 0xFFFFFF04FFFFFF00ull); // ---C ---8 ---4 ---0
3116 const __m128i mask32_topRight = SSE::set128i(0xFFFFFF0DFFFFFF09ull, 0xFFFFFF05FFFFFF01ull); // ---D ---9 ---5 ---1
3117 const __m128i mask32_bottomLeft = SSE::set128i(0xFFFFFF0EFFFFFF0Aull, 0xFFFFFF06FFFFFF02ull); // ---E ---A ---6 ---2
3118 const __m128i mask32_bottomRight = SSE::set128i(0xFFFFFF0FFFFFFF0Bull, 0xFFFFFF07FFFFFF03ull); // ---F ---B ---7 ---3
3119
3120
3121 // we extract the top left values and multiply them with the interpolation factors
3122 // FEDC BA98 7654 3210
3123 // ---C ---8 ---4 ---0
3124 // *
3125 // FTL3 FTL2 FTL1 FTL0
3126 __m128i multiplicationA = _mm_mullo_epi32(m128_factorsTopLeft, _mm_shuffle_epi8(m128_pixels, mask32_topLeft));
3127 __m128i multiplicationB = _mm_mullo_epi32(m128_factorsTopRight, _mm_shuffle_epi8(m128_pixels, mask32_topRight));
3128
3129 multiplicationA = _mm_add_epi32(multiplicationA, _mm_mullo_epi32(m128_factorsBottomLeft, _mm_shuffle_epi8(m128_pixels, mask32_bottomLeft)));
3130 multiplicationB = _mm_add_epi32(multiplicationB, _mm_mullo_epi32(m128_factorsBottomRight, _mm_shuffle_epi8(m128_pixels, mask32_bottomRight)));
3131
3132 __m128i multiplication = _mm_add_epi32(multiplicationA, multiplicationB);
3133
3134 const __m128i m128_i_8192 = _mm_set1_epi32(8192);
3135
3136 // we add 8192 for rounding and shift the result by 14 bits (division by 128*128) // TODO if using 256 we should be able to avoid the shifting by 14 bits (simply by using shuffle operations)
3137 // additionally, we shuffle the individual results together
3138
3139 const __m128i result = _mm_shuffle_epi8(_mm_srli_epi32(_mm_add_epi32(multiplication, m128_i_8192), 14), SSE::set128i(0xFFFFFFFFFFFFFFFFull, 0xFFFFFFFF0C080400ull));
3140
3141 *((unsigned int*)targetPositionPixels) = _mm_extract_epi32(result, 0);
3142}
3143
3144template <>
3145OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<3u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
3146{
3147 ocean_assert(source != nullptr);
3148 ocean_assert(targetPositionPixels != nullptr);
3149
3150 using PixelType = typename DataType<uint8_t, 3u>::Type;
3151
3152 // as we do not initialize the following intermediate data,
3153 // we hopefully will not allocate memory on the stack each time this function is called
3154 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[6];
3155 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[6];
3156 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[6];
3157 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[6];
3158
3159 // we gather the individual source pixel values from the source image,
3160 // based on the calculated pixel locations
3161 for (unsigned int i = 0u; i < 4u; ++i)
3162 {
3163 if (validPixels[i])
3164 {
3165 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3166 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3167 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3168 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3169 }
3170 else
3171 {
3172 topLeftPixels[i] = borderColor;
3173 topRightPixels[i] = borderColor;
3174 bottomLeftPixels[i] = borderColor;
3175 bottomRightPixels[i] = borderColor;
3176 }
3177 }
3178
3179 static_assert(sizeof(__m128i) <= sizeof(topLeftPixels), "Invalid data type!");
3180
3181 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3182 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3183 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3184 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3185
3186
3187 // factorLeft = 128 - factorRight
3188 // factorTop = 128 - factorBottom
3189
3190 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3191 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3192
3193 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3194 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3195
3196 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3197 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3198 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3199 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3200
3201
3202 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<3u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3203
3204 // we copy the first 12 bytes
3205 memcpy(targetPositionPixels, &m128_interpolationResult, 12u);
3206}
3207
3208#endif // OCEAN_COMPILER_MSC
3209
3210template <>
3211OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE<4u>(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
3212{
3213 ocean_assert(source != nullptr);
3214 ocean_assert(targetPositionPixels != nullptr);
3215
3216 using PixelType = typename DataType<uint8_t, 4u>::Type;
3217
3218 // as we do not initialize the following intermediate data,
3219 // we hopefully will not allocate memory on the stack each time this function is called
3220 OCEAN_ALIGN_DATA(16) PixelType topLeftPixels[4];
3221 OCEAN_ALIGN_DATA(16) PixelType topRightPixels[4];
3222 OCEAN_ALIGN_DATA(16) PixelType bottomLeftPixels[4];
3223 OCEAN_ALIGN_DATA(16) PixelType bottomRightPixels[4];
3224
3225 // we gather the individual source pixel values from the source image,
3226 // based on the calculated pixel locations
3227
3228 for (unsigned int i = 0u; i < 4u; ++i)
3229 {
3230 if (validPixels[i])
3231 {
3232 topLeftPixels[i] = *((PixelType*)(source + offsetsTopLeft[i]));
3233 topRightPixels[i] = *((PixelType*)(source + offsetsTopRight[i]));
3234 bottomLeftPixels[i] = *((PixelType*)(source + offsetsBottomLeft[i]));
3235 bottomRightPixels[i] = *((PixelType*)(source + offsetsBottomRight[i]));
3236 }
3237 else
3238 {
3239 topLeftPixels[i] = borderColor;
3240 topRightPixels[i] = borderColor;
3241 bottomLeftPixels[i] = borderColor;
3242 bottomRightPixels[i] = borderColor;
3243 }
3244 }
3245
3246 static_assert(sizeof(__m128i) == sizeof(topLeftPixels), "Invalid data type!");
3247
3248 const __m128i m128_topLeftPixels = _mm_load_si128((const __m128i*)topLeftPixels);
3249 const __m128i m128_topRightPixels = _mm_load_si128((const __m128i*)topRightPixels);
3250 const __m128i m128_bottomLeftPixels = _mm_load_si128((const __m128i*)bottomLeftPixels);
3251 const __m128i m128_bottomRightPixels = _mm_load_si128((const __m128i*)bottomRightPixels);
3252
3253
3254 // factorLeft = 128 - factorRight
3255 // factorTop = 128 - factorBottom
3256
3257 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3258 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3259
3260 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3261 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3262
3263 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3264 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3265 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3266 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3267
3268
3269 const __m128i m128_interpolationResult = interpolate4Pixels8BitPerChannelSSE<4u>(m128_topLeftPixels, m128_topRightPixels, m128_bottomLeftPixels, m128_bottomRightPixels, m128_factorsTopLeft, m128_factorsTopRight, m128_factorsBottomLeft, m128_factorsBottomRight);
3270
3271 _mm_storeu_si128((__m128i*)targetPositionPixels, m128_interpolationResult);
3272}
3273
3274template <unsigned int tChannels>
3275OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelSSE(const uint8_t* source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const __m128i& m128_factorsRight, const __m128i& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
3276{
3277 ocean_assert(source != nullptr);
3278 ocean_assert(targetPositionPixels != nullptr);
3279
3280 // as we do not initialize the following intermediate data,
3281 // we hopefully will not allocate memory on the stack each time this function is called
3282 OCEAN_ALIGN_DATA(16) unsigned int factorsTopLeft[4];
3283 OCEAN_ALIGN_DATA(16) unsigned int factorsTopRight[4];
3284 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomLeft[4];
3285 OCEAN_ALIGN_DATA(16) unsigned int factorsBottomRight[4];
3286
3287
3288 // factorLeft = 128 - factorRight
3289 // factorTop = 128 - factorBottom
3290
3291 const __m128i m128_factorsLeft = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsRight);
3292 const __m128i m128_factorsTop = _mm_sub_epi32(_mm_set1_epi32(128), m128_factorsBottom);
3293
3294 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3295 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3296
3297 const __m128i m128_factorsTopLeft = _mm_mullo_epi32(m128_factorsTop, m128_factorsLeft);
3298 const __m128i m128_factorsTopRight = _mm_mullo_epi32(m128_factorsTop, m128_factorsRight);
3299 const __m128i m128_factorsBottomLeft = _mm_mullo_epi32(m128_factorsBottom, m128_factorsLeft);
3300 const __m128i m128_factorsBottomRight = _mm_mullo_epi32(m128_factorsBottom, m128_factorsRight);
3301
3302
3303 // we store the interpolation factors
3304 _mm_store_si128((__m128i*)factorsTopLeft, m128_factorsTopLeft);
3305 _mm_store_si128((__m128i*)factorsTopRight, m128_factorsTopRight);
3306 _mm_store_si128((__m128i*)factorsBottomLeft, m128_factorsBottomLeft);
3307 _mm_store_si128((__m128i*)factorsBottomRight, m128_factorsBottomRight);
3308
3309 for (unsigned int i = 0u; i < 4u; ++i)
3310 {
3311 if (validPixels[i])
3312 {
3313 const uint8_t* topLeft = source + offsetsTopLeft[i];
3314 const uint8_t* topRight = source + offsetsTopRight[i];
3315
3316 const uint8_t* bottomLeft = source + offsetsBottomLeft[i];
3317 const uint8_t* bottomRight = source + offsetsBottomRight[i];
3318
3319 const unsigned int& factorTopLeft = factorsTopLeft[i];
3320 const unsigned int& factorTopRight = factorsTopRight[i];
3321 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
3322 const unsigned int& factorBottomRight = factorsBottomRight[i];
3323
3324 for (unsigned int n = 0u; n < tChannels; ++n)
3325 {
3326 ((uint8_t*)targetPositionPixels)[n] = (uint8_t)((topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u);
3327 }
3328 }
3329 else
3330 {
3331 *targetPositionPixels = borderColor;
3332 }
3333
3334 targetPositionPixels++;
3335 }
3336}
3337
3338#endif // OCEAN_HARDWARE_SSE_VERSION
3339
3340#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3341
3342template <unsigned int tChannels>
3343void FrameInterpolatorBilinear::affine8BitPerChannelNEONSubset(const uint8_t* source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3* source_A_target, const uint8_t* borderColor, uint8_t* target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
3344{
3345 static_assert(tChannels >= 1u, "Invalid channel number!");
3346
3347 ocean_assert(source && target);
3348 ocean_assert(sourceWidth > 0u && sourceHeight > 0u);
3349 ocean_assert_and_suppress_unused(targetWidth >= 4u && targetHeight > 0u, targetHeight);
3350 ocean_assert(source_A_target);
3351 ocean_assert(!source_A_target->isNull() && Numeric::isEqualEps((*source_A_target)[2]) && Numeric::isEqualEps((*source_A_target)[5]));
3352
3353 ocean_assert(firstTargetRow + numberTargetRows <= targetHeight);
3354
3355 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
3356 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
3357
3358 using PixelType = typename DataType<uint8_t, tChannels>::Type;
3359
3360 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3361 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3362
3363 unsigned int validPixels[4];
3364
3365 unsigned int topLeftOffsetsElements[4];
3366 unsigned int topRightOffsetsElements[4];
3367 unsigned int bottomLeftOffsetsElements[4];
3368 unsigned int bottomRightOffsetsElements[4];
3369
3370 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3371
3372 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3373 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*source_A_target)(0, 0)));
3374 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*source_A_target)(1, 0)));
3375
3376 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
3377 {
3378 PixelType* targetRow = (PixelType*)(target + y * targetStrideElements);
3379
3380 /*
3381 * We can slightly optimize the 3x3 matrix multiplication:
3382 *
3383 * | X0 Y0 Z0 | | x |
3384 * | X1 Y1 Z1 | * | y |
3385 * | 0 0 1 | | 1 |
3386 *
3387 * | xx | | X0 * x | | Y0 * y + Z0 |
3388 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3389 *
3390 * As y is constant within the inner loop, the two terms on the right side in the above equations can be pre-calculated:
3391 *
3392 * C0 = Y0 * y + Z0
3393 * C1 = Y1 * y + Z1
3394 *
3395 * So the computation becomes:
3396 *
3397 * | x' | | X0 * x | | C0 |
3398 * | y' | = | X1 * x | + | C1 |
3399 */
3400
3401 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3402 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*source_A_target)(0, 1) * Scalar(y) + (*source_A_target)(0, 2)));
3403 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*source_A_target)(1, 1) * Scalar(y) + (*source_A_target)(1, 2)));
3404
3405 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3406 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3407
3408 // we store 4 integers: [sourceStrideElements, sourceStrideElements, sourceStrideElements, sourceStrideElements]
3409 const uint32x4_t m128_u_sourceStrideElements = vdupq_n_u32(sourceStrideElements);
3410
3411 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3412 const uint32x4_t m128_u_sourceWidth_1 = vdupq_n_u32(sourceWidth - 1u);
3413 const uint32x4_t m128_u_sourceHeight_1 = vdupq_n_u32(sourceHeight - 1u);
3414
3415 // we store 4 floats: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1], and same with sourceHeight
3416 const float32x4_t m128_f_sourceWidth_1 = vdupq_n_f32(float(sourceWidth - 1u));
3417 const float32x4_t m128_f_sourceHeight_1 = vdupq_n_f32(float(sourceHeight - 1u));
3418
3419 for (unsigned int x = 0u; x < targetWidth; x += 4u)
3420 {
3421 if (x + 4u > targetWidth)
3422 {
3423 // the last iteration will not fit into the target frame,
3424 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3425
3426 ocean_assert(x >= 4u && targetWidth > 4u);
3427 const unsigned int newX = targetWidth - 4u;
3428
3429 ocean_assert(x > newX);
3430 targetRow -= x - newX;
3431
3432 x = newX;
3433
3434 // the for loop will stop after this iteration
3435 ocean_assert(!(x + 4u < targetWidth));
3436 }
3437
3438
3439 // we need four successive x coordinate floats:
3440 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3441 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3442 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3443
3444 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3445 const float32x4_t m128_f_sourceX = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3446 const float32x4_t m128_f_sourceY = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3447
3448
3449 // now we check whether we are inside the source frame
3450 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_sourceX, m128_f_sourceWidth_1), vcgeq_f32(m128_f_sourceX, m128_f_zero)); // sourcePosition.x() <= (sourceWidth - 1) && sourcePosition.x() >= 0 ? 0xFFFFFFFF : 0x00000000
3451 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_sourceY, m128_f_sourceHeight_1), vcgeq_f32(m128_f_sourceY, m128_f_zero)); // sourcePosition.y() <= (sourceHeight - 1) && sourcePosition.y() >= 0 ? 0xFFFFFFFF : 0x00000000
3452
3453 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_source_frame(sourcePosition) ? 0xFFFFFFFF : 0x00000000
3454
3455
3456 // we can stop here if all pixels are invalid
3457 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3458 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3459 {
3460#ifdef OCEAN_DEBUG
3461 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3462 vst1q_u32(debugValidPixels, m128_u_validPixel);
3463 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3464#endif
3465
3466 targetRow[0] = *bColor;
3467 targetRow[1] = *bColor;
3468 targetRow[2] = *bColor;
3469 targetRow[3] = *bColor;
3470
3471 targetRow += 4;
3472
3473 continue;
3474 }
3475
3476
3477 // we store the result
3478 vst1q_u32(validPixels, m128_u_validPixel);
3479 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3480
3481
3482 // now we determine the left, top, right and bottom pixel used for the interpolation
3483 // left = floor(x); top = floor(y)
3484 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_sourceX);
3485 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_sourceY);
3486
3487 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3488 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_sourceWidth_1);
3489 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_sourceHeight_1);
3490
3491 // offset = y * stride + x * channels
3492 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topLeftOffset = top * strideElements + left * channels
3493 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_sourceStrideElements); // topRightOffset = top * strideElements + right * channels
3494 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements); // ...
3495 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_sourceStrideElements);
3496
3497 // we store the offsets
3498 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3499 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3500 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3501 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3502
3503
3504 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3505
3506 // we determine the fractional portions of the x' and y':
3507 float32x4_t m128_f_tx = vsubq_f32(m128_f_sourceX, vcvtq_f32_u32(m128_u_left));
3508 float32x4_t m128_f_ty = vsubq_f32(m128_f_sourceY, vcvtq_f32_u32(m128_u_top));
3509
3510 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3511 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3512 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3513
3514 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3515 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3516
3517 if constexpr (tChannels > 4u)
3518 {
3519 // normally we would simply call instead of copying the code of the function to this location
3520 // however, if calling the function instead of applying the code here directly
3521 // clang ends with code approx. 20% slower
3522 // thus we make a copy of the code and keep the function for demonstration purposes
3523
3524 //interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetPixelData);
3525 //targetPixelData += 4;
3526
3527 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3528 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3529
3530 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3531 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3532 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3533 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3534 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3535 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3536
3537 unsigned int tx_ty_s[4];
3538 unsigned int txty_s[4];
3539 unsigned int tx_tys[4];
3540 unsigned int txtys[4];
3541
3542 // we store the interpolation factors
3543 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3544 vst1q_u32(txty_s, m128_u_txty_);
3545 vst1q_u32(tx_tys, m128_u_tx_ty);
3546 vst1q_u32(txtys, m128_u_txty);
3547
3548 for (unsigned int i = 0u; i < 4u; ++i)
3549 {
3550 if (validPixels[i])
3551 {
3552 ocean_assert(topLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3553 ocean_assert(topRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3554 ocean_assert(bottomLeftOffsetsElements[i] < sourceStrideElements * sourceHeight);
3555 ocean_assert(bottomRightOffsetsElements[i] < sourceStrideElements * sourceHeight);
3556
3557 const uint8_t* topLeft = source + topLeftOffsetsElements[i];
3558 const uint8_t* topRight = source + topRightOffsetsElements[i];
3559
3560 const uint8_t* bottomLeft = source + bottomLeftOffsetsElements[i];
3561 const uint8_t* bottomRight = source + bottomRightOffsetsElements[i];
3562
3563 const unsigned int tx_ty_ = tx_ty_s[i];
3564 const unsigned int txty_ = txty_s[i];
3565 const unsigned int tx_ty = tx_tys[i];
3566 const unsigned int txty = txtys[i];
3567
3568 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3569
3570 for (unsigned int n = 0u; n < tChannels; ++n)
3571 {
3572 ((uint8_t*)targetRow)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3573 }
3574 }
3575 else
3576 {
3577 *targetRow = *bColor;
3578 }
3579
3580 targetRow++;
3581 }
3582 }
3583 else
3584 {
3585 interpolate4Pixels8BitPerChannelNEON<tChannels>(source, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, targetRow);
3586 targetRow += 4;
3587 }
3588 }
3589 }
3590}
3591
3592template <unsigned int tChannels>
3593void FrameInterpolatorBilinear::homography8BitPerChannelNEONSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, const uint8_t* borderColor, uint8_t* output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
3594{
3595 static_assert(tChannels >= 1u, "Invalid channel number!");
3596
3597 ocean_assert(input != nullptr && output != nullptr);
3598 ocean_assert(inputWidth > 0u && inputHeight > 0u);
3599 ocean_assert_and_suppress_unused(outputWidth >= 4u && outputHeight > 0u, outputHeight);
3600 ocean_assert(input_H_output != nullptr);
3601
3602 ocean_assert(firstOutputRow + numberOutputRows <= outputHeight);
3603
3604 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
3605 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
3606
3607 using PixelType = typename DataType<uint8_t, tChannels>::Type;
3608
3609 uint8_t zeroColor[tChannels] = {uint8_t(0)};
3610 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
3611
3612 unsigned int validPixels[4];
3613
3614 unsigned int topLeftOffsetsElements[4];
3615 unsigned int topRightOffsetsElements[4];
3616 unsigned int bottomLeftOffsetsElements[4];
3617 unsigned int bottomRightOffsetsElements[4];
3618
3619 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
3620
3621 // we store 4 floats: [X0, X0, X0, X0], and same with X1 and X2
3622 const float32x4_t m128_f_X0 = vdupq_n_f32(float((*input_H_output)(0, 0)));
3623 const float32x4_t m128_f_X1 = vdupq_n_f32(float((*input_H_output)(1, 0)));
3624 const float32x4_t m128_f_X2 = vdupq_n_f32(float((*input_H_output)(2, 0)));
3625
3626 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
3627 {
3628 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
3629
3630 /*
3631 * We can slightly optimize the 3x3 matrix multiplication:
3632 *
3633 * | X0 Y0 Z0 | | x |
3634 * | X1 Y1 Z1 | * | y |
3635 * | X2 Y2 Z2 | | 1 |
3636 *
3637 * | xx | | X0 * x | | Y0 * y + Z0 |
3638 * | yy | = | X1 * x | + | Y1 * y + Z1 |
3639 * | zz | | X2 * x | | Y2 * y + Z2 |
3640 *
3641 * | xx | | X0 * x | | C0 |
3642 * | yy | = | X1 * x | + | C1 |
3643 * | zz | | X2 * x | | C3 |
3644 *
3645 * As y is constant within the inner loop, we can pre-calculate the following terms:
3646 *
3647 * | x' | | (X0 * x + C0) / (X2 * x + C2) |
3648 * | y' | = | (X1 * x + C1) / (X2 * x + C2) |
3649 */
3650
3651 // we store 4 floats: [C0, C0, C0, C0], and same with C1 and C2
3652 const float32x4_t m128_f_C0 = vdupq_n_f32(float((*input_H_output)(0, 1) * Scalar(y) + (*input_H_output)(0, 2)));
3653 const float32x4_t m128_f_C1 = vdupq_n_f32(float((*input_H_output)(1, 1) * Scalar(y) + (*input_H_output)(1, 2)));
3654 const float32x4_t m128_f_C2 = vdupq_n_f32(float((*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2)));
3655
3656 // we store 4 floats: [0.0f, 0.0f, 0.0f, 0.0f]
3657 const float32x4_t m128_f_zero = vdupq_n_f32(0.0f);
3658
3659 // we store 4 integers: [inputStrideElements, inputStrideElements, inputStrideElements, inputStrideElements]
3660 const uint32x4_t m128_u_inputStrideElements = vdupq_n_u32(inputStrideElements);
3661
3662 // we store 4 integers: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3663 const uint32x4_t m128_u_inputWidth_1 = vdupq_n_u32(inputWidth - 1u);
3664 const uint32x4_t m128_u_inputHeight_1 = vdupq_n_u32(inputHeight - 1u);
3665
3666 // we store 4 floats: [inputWidth - 1, inputWidth - 1, inputWidth - 1, inputWidth - 1], and same with inputHeight
3667 const float32x4_t m128_f_inputWidth_1 = vdupq_n_f32(float(inputWidth - 1u));
3668 const float32x4_t m128_f_inputHeight_1 = vdupq_n_f32(float(inputHeight - 1u));
3669
3670 for (unsigned int x = 0u; x < outputWidth; x += 4u)
3671 {
3672 if (x + 4u > outputWidth)
3673 {
3674 // the last iteration will not fit into the output frame,
3675 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
3676
3677 ocean_assert(x >= 4u && outputWidth > 4u);
3678 const unsigned int newX = outputWidth - 4u;
3679
3680 ocean_assert(x > newX);
3681 outputPixelData -= x - newX;
3682
3683 x = newX;
3684
3685 // the for loop will stop after this iteration
3686 ocean_assert(!(x + 4u < outputWidth));
3687 }
3688
3689
3690 // we need four successive x coordinate floats:
3691 // [x + 3.0f, x + 2.0f, x + 1.0f; x + 0.0f]
3692 float x_0123[4] = {float(x + 0u), float(x + 1u), float(x + 2u), float(x + 3u)};
3693 const float32x4_t m128_f_x_0123 = vld1q_f32(x_0123);
3694
3695 // we calculate xx and yy and zz for [x + 3.0f, x + 2.0f, x + 1.0f, x + 0.0f]
3696 const float32x4_t m128_f_xx = vmlaq_f32(m128_f_C0, m128_f_X0, m128_f_x_0123);
3697 const float32x4_t m128_f_yy = vmlaq_f32(m128_f_C1, m128_f_X1, m128_f_x_0123);
3698 const float32x4_t m128_f_zz = vmlaq_f32(m128_f_C2, m128_f_X2, m128_f_x_0123);
3699
3700#ifdef USE_DIVISION_ARM64_ARCHITECTURE
3701
3702 // using the division available from ARM64 is more precise
3703 const float32x4_t m128_f_inputX = vdivq_f32(m128_f_xx, m128_f_zz);
3704 const float32x4_t m128_f_inputY = vdivq_f32(m128_f_yy, m128_f_zz);
3705
3706#else
3707
3708 // we calculate the (approximated) inverse of zz
3709 // [1/zz3, 1/zz2, 1/zz1, 1/zz0]
3710 float32x4_t inv_zz_128 = vrecpeq_f32(m128_f_zz);
3711 inv_zz_128 = vmulq_f32(vrecpsq_f32(m128_f_zz, inv_zz_128), inv_zz_128); // improving the accuracy of the approx. inverse by Newton/Raphson
3712
3713 // we determine the normalized coordinates x' and y' for for x + 3.0f, x + 2.0f, ...)
3714 const float32x4_t m128_f_inputX = vmulq_f32(m128_f_xx, inv_zz_128);
3715 const float32x4_t m128_f_inputY = vmulq_f32(m128_f_yy, inv_zz_128);
3716
3717#endif // USE_DIVISION_ARM64_ARCHITECTURE
3718
3719
3720 // now we check whether we are inside the input frame
3721 const uint32x4_t m128_u_validPixelX = vandq_u32(vcleq_f32(m128_f_inputX, m128_f_inputWidth_1), vcgeq_f32(m128_f_inputX, m128_f_zero)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
3722 const uint32x4_t m128_u_validPixelY = vandq_u32(vcleq_f32(m128_f_inputY, m128_f_inputHeight_1), vcgeq_f32(m128_f_inputY, m128_f_zero)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
3723
3724 const uint32x4_t m128_u_validPixel = vandq_u32(m128_u_validPixelX, m128_u_validPixelY); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
3725
3726
3727 // we can stop here if all pixels are invalid
3728 const uint32x2_t m64_u_validPixel = vorr_u32(vget_low_u32(m128_u_validPixel), vget_high_u32(m128_u_validPixel));
3729 if ((vget_lane_u32(m64_u_validPixel, 0) | vget_lane_u32(m64_u_validPixel, 1)) == 0x00000000u)
3730 {
3731#ifdef OCEAN_DEBUG
3732 OCEAN_ALIGN_DATA(16) unsigned int debugValidPixels[4];
3733 vst1q_u32(debugValidPixels, m128_u_validPixel);
3734 ocean_assert(!(debugValidPixels[0] || debugValidPixels[1] || debugValidPixels[2] || debugValidPixels[3]));
3735#endif
3736
3737 outputPixelData[0] = *bColor;
3738 outputPixelData[1] = *bColor;
3739 outputPixelData[2] = *bColor;
3740 outputPixelData[3] = *bColor;
3741
3742 outputPixelData += 4;
3743
3744 continue;
3745 }
3746
3747
3748 // we store the result
3749 vst1q_u32(validPixels, m128_u_validPixel);
3750 ocean_assert(validPixels[0] || validPixels[1] || validPixels[2] || validPixels[3]);
3751
3752
3753 // now we determine the left, top, right and bottom pixel used for the interpolation
3754 // left = floor(x); top = floor(y)
3755 const uint32x4_t m128_u_left = vcvtq_u32_f32(m128_f_inputX);
3756 const uint32x4_t m128_u_top = vcvtq_u32_f32(m128_f_inputY);
3757
3758 // right = min(left + 1, width - 1); bottom = min(top + 1; height - 1)
3759 const uint32x4_t m128_u_right = vminq_u32(vaddq_u32(m128_u_left, vdupq_n_u32(1u)), m128_u_inputWidth_1);
3760 const uint32x4_t m128_u_bottom = vminq_u32(vaddq_u32(m128_u_top, vdupq_n_u32(1u)), m128_u_inputHeight_1);
3761
3762 // offset = y * stride + x * channels
3763 const uint32x4_t m128_u_topLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topLeftOffset = top * strideElements + left * channels
3764 const uint32x4_t m128_u_topRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_top, m128_u_inputStrideElements); // topRightOffset = top * strideElements + right * channels
3765 const uint32x4_t m128_u_bottomLeftOffsetElements = vmlaq_u32(vmulq_u32(m128_u_left, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements); // ...
3766 const uint32x4_t m128_u_bottomRightOffsetElements = vmlaq_u32(vmulq_u32(m128_u_right, constantChannels_u_32x4), m128_u_bottom, m128_u_inputStrideElements);
3767
3768 // we store the offsets
3769 vst1q_u32(topLeftOffsetsElements, m128_u_topLeftOffsetElements);
3770 vst1q_u32(topRightOffsetsElements, m128_u_topRightOffsetElements);
3771 vst1q_u32(bottomLeftOffsetsElements, m128_u_bottomLeftOffsetElements);
3772 vst1q_u32(bottomRightOffsetsElements, m128_u_bottomRightOffsetElements);
3773
3774
3775 // now we need to determine the interpolation factors tx, tx_ and ty, ty_: (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3776
3777 // we determine the fractional portions of the x' and y':
3778 float32x4_t m128_f_tx = vsubq_f32(m128_f_inputX, vcvtq_f32_u32(m128_u_left));
3779 float32x4_t m128_f_ty = vsubq_f32(m128_f_inputY, vcvtq_f32_u32(m128_u_top));
3780
3781 // we use integer interpolation [0.0, 1.0] -> [0, 128]
3782 m128_f_tx = vmulq_f32(m128_f_tx, vdupq_n_f32(128.0f));
3783 m128_f_ty = vmulq_f32(m128_f_ty, vdupq_n_f32(128.0f));
3784
3785 const uint32x4_t m128_u_tx = vcvtq_u32_f32(vaddq_f32(m128_f_tx, vdupq_n_f32(0.5)));
3786 const uint32x4_t m128_u_ty = vcvtq_u32_f32(vaddq_f32(m128_f_ty, vdupq_n_f32(0.5)));
3787
3788 if constexpr (tChannels > 4u)
3789 {
3790 // normally we would simply call instead of copying the code of the function to this location
3791 // however, if calling the function instead of applying the code here directly
3792 // clang ends with code approx. 20% slower
3793 // thus we make a copy of the code and keep the function for demonstration purposes
3794
3795 //interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3796 //outputPixelData += 4;
3797
3798 const uint32x4_t m128_u_tx_ = vsubq_u32(vdupq_n_u32(128u), m128_u_tx);
3799 const uint32x4_t m128_u_ty_ = vsubq_u32(vdupq_n_u32(128u), m128_u_ty);
3800
3801 // (top_left * tx_ + top_right * tx) * ty_ + (bottom_left * tx_ + bottom_right * tx) * ty
3802 // == top_left * tx_ty_ + top_right * txty_ + bottom_left * tx_ty + bottom_right * txty
3803 const uint32x4_t m128_u_tx_ty_ = vmulq_u32(m128_u_tx_, m128_u_ty_);
3804 const uint32x4_t m128_u_txty_ = vmulq_u32(m128_u_tx, m128_u_ty_);
3805 const uint32x4_t m128_u_tx_ty = vmulq_u32(m128_u_tx_, m128_u_ty);
3806 const uint32x4_t m128_u_txty = vmulq_u32(m128_u_tx, m128_u_ty);
3807
3808 unsigned int tx_ty_s[4];
3809 unsigned int txty_s[4];
3810 unsigned int tx_tys[4];
3811 unsigned int txtys[4];
3812
3813 // we store the interpolation factors
3814 vst1q_u32(tx_ty_s, m128_u_tx_ty_);
3815 vst1q_u32(txty_s, m128_u_txty_);
3816 vst1q_u32(tx_tys, m128_u_tx_ty);
3817 vst1q_u32(txtys, m128_u_txty);
3818
3819 for (unsigned int i = 0u; i < 4u; ++i)
3820 {
3821 if (validPixels[i])
3822 {
3823 ocean_assert(topLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3824 ocean_assert(topRightOffsetsElements[i] < inputStrideElements * inputHeight);
3825 ocean_assert(bottomLeftOffsetsElements[i] < inputStrideElements * inputHeight);
3826 ocean_assert(bottomRightOffsetsElements[i] < inputStrideElements * inputHeight);
3827
3828 const uint8_t* topLeft = input + topLeftOffsetsElements[i];
3829 const uint8_t* topRight = input + topRightOffsetsElements[i];
3830
3831 const uint8_t* bottomLeft = input + bottomLeftOffsetsElements[i];
3832 const uint8_t* bottomRight = input + bottomRightOffsetsElements[i];
3833
3834 const unsigned int tx_ty_ = tx_ty_s[i];
3835 const unsigned int txty_ = txty_s[i];
3836 const unsigned int tx_ty = tx_tys[i];
3837 const unsigned int txty = txtys[i];
3838
3839 ocean_assert(tx_ty_ + txty_ + tx_ty + txty == 128u * 128u);
3840
3841 for (unsigned int n = 0u; n < tChannels; ++n)
3842 {
3843 ((uint8_t*)outputPixelData)[n] = uint8_t((topLeft[n] * tx_ty_ + topRight[n] * txty_ + bottomLeft[n] * tx_ty + bottomRight[n] * txty + 8192u) >> 14u);
3844 }
3845 }
3846 else
3847 {
3848 *outputPixelData = *bColor;
3849 }
3850
3851 outputPixelData++;
3852 }
3853 }
3854 else
3855 {
3856 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, m128_u_tx, m128_u_ty, outputPixelData);
3857 outputPixelData += 4;
3858 }
3859 }
3860 }
3861}
3862
3863template <>
3864OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<1u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 1u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 1u>::Type* targetPositionPixels)
3865{
3866 ocean_assert(source != nullptr);
3867 ocean_assert(targetPositionPixels != nullptr);
3868
3869 // as we do not initialize the following intermediate data,
3870 // we hopefully will not allocate memory on the stack each time this function is called
3871 DataType<uint8_t, 1u>::Type pixels[16];
3872
3873 // we will store the pixel information in the following pattern:
3874 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3875 // BR3 BL3 TR3 TL3 BR2 BL2 TR2 TL2 BR1 BL1 TR1 TL1 BR0 BL0 TR0 TL0
3876
3877 // we gather the individual source pixel values from the source image,
3878 // based on the calculated pixel locations
3879 for (unsigned int i = 0u; i < 4u; ++i)
3880 {
3881 if (validPixels[i])
3882 {
3883 pixels[i * 4u + 0u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopLeftElements[i]));
3884 pixels[i * 4u + 1u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsTopRightElements[i]));
3885 pixels[i * 4u + 2u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomLeftElements[i]));
3886 pixels[i * 4u + 3u] = *((const DataType<uint8_t, 1u>::Type*)(source + offsetsBottomRightElements[i]));
3887 }
3888 else
3889 {
3890 pixels[i * 4u + 0u] = borderColor;
3891 pixels[i * 4u + 1u] = borderColor;
3892 pixels[i * 4u + 2u] = borderColor;
3893 pixels[i * 4u + 3u] = borderColor;
3894 }
3895 }
3896
3897 static_assert(sizeof(uint8x16_t) == sizeof(pixels), "Invalid data type!");
3898
3899 const uint8x16_t m128_pixels = vld1q_u8((const uint8_t*)pixels);
3900
3901
3902 // factorLeft = 128 - factorRight
3903 // factorTop = 128 - factorBottom
3904
3905 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
3906 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
3907
3908 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
3909 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
3910
3911 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
3912 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
3913 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
3914 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
3915
3916 // pixels stores the four interpolation grascale pixel values (top left, top right, bottom left, bottom right) for 4 (independent) pixels:
3917 // F E D C B A 9 8 7 6 5 4 3 2 1 0
3918 // BR BL TR TL BR BL TR TL BR BL TR TL BR BL TR TL
3919
3920 // factorsTopLeft stores the 32 bit interpolation values for 4 pixels:
3921 // FEDC BA98 7654 3210
3922 // 3 2 1 0 (32 bit interpolation values, fitting into 16 bit)
3923
3924
3925 // we will simply extract each channel from the source pixels,
3926 // each extracted channel will be multiplied by the corresponding interpolation factor
3927 // and all interpolation results will be accumulated afterwards
3928
3929 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
3930
3931 const uint32x4_t m128_muliplicationA = vmulq_u32(vandq_u32(vreinterpretq_u32_u8(m128_pixels), m128_maskFirstByte), m128_factorsTopLeft);
3932 const uint32x4_t m128_muliplicationB = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 8), m128_maskFirstByte), m128_factorsTopRight);
3933 const uint32x4_t m128_muliplicationC = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 16), m128_maskFirstByte), m128_factorsBottomLeft);
3934 const uint32x4_t m128_muliplicationD = vmulq_u32(vandq_u32(vshrq_n_u32(vreinterpretq_u32_u8(m128_pixels), 24), m128_maskFirstByte), m128_factorsBottomRight);
3935
3936 const uint32x4_t m128_multiplication = vaddq_u32(vaddq_u32(m128_muliplicationA, m128_muliplicationB), vaddq_u32(m128_muliplicationC, m128_muliplicationD));
3937
3938 // we add 8192 and shift by 14 bits
3939
3940 const uint8x16_t m128_interpolation = vreinterpretq_u8_u32(vshrq_n_u32(vaddq_u32(m128_multiplication, vdupq_n_u32(8192u)), 14));
3941
3942 // finally we have the following result:
3943 // ---C ---8 ---4 ---0
3944 // and we need to extract the four pixel values:
3945 //
3946 // NOTE: Because of a possible bug in Clang affecting ARMv7, vget_lane_u32()
3947 // seems to assume 32-bit memory alignment for output location, which cannot
3948 // be guaranteed. This results in bus errors and crashes the application.
3949 // ARM64 is not affected.
3950#if defined(__aarch64__)
3951
3952 constexpr uint8x8_t m64_mask0 = NEON::create_uint8x8(0, 4, 1, 1, 1, 1, 1, 1);
3953 constexpr uint8x8_t m64_mask1 = NEON::create_uint8x8(1, 1, 0, 4, 1, 1, 1, 1);
3954
3955 const uint8x8_t m64_interpolation01 = vtbl1_u8(vget_low_u8(m128_interpolation), m64_mask0);
3956 const uint8x8_t m64_interpolation23 = vtbl1_u8(vget_high_u8(m128_interpolation), m64_mask1);
3957
3958 const uint8x8_t m64_interpolation0123 = vorr_u8(m64_interpolation01, m64_interpolation23);
3959
3960 const uint32_t result = vget_lane_u32(vreinterpret_u32_u8(m64_interpolation0123), 0);
3961 memcpy(targetPositionPixels, &result, sizeof(uint32_t));
3962
3963#else
3964
3965 *((uint8_t*)targetPositionPixels + 0) = vgetq_lane_u8(m128_interpolation, 0);
3966 *((uint8_t*)targetPositionPixels + 1) = vgetq_lane_u8(m128_interpolation, 4);
3967 *((uint8_t*)targetPositionPixels + 2) = vgetq_lane_u8(m128_interpolation, 8);
3968 *((uint8_t*)targetPositionPixels + 3) = vgetq_lane_u8(m128_interpolation, 12);
3969
3970#endif
3971}
3972
3973OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate8Pixels1Channel8BitNEON(const uint8x8_t& topLeft_u_8x8, const uint8x8_t& topRight_u_8x8, const uint8x8_t& bottomLeft_u_8x8, const uint8x8_t& bottomRight_u_8x8, const uint8x16_t& factorsRight_factorsBottom_128_u_8x16, uint8_t* targetPositionPixels)
3974{
3975 const uint8x16_t factorsLeft_factorsTop_128_u_8x16 = vsubq_u8(vdupq_n_u8(128u), factorsRight_factorsBottom_128_u_8x16); // factorLeft = 128 - factorRight, factorTop = 128 - factorBottomv
3976
3977 const uint8x8_t factorsRight_u_8x8 = vget_low_u8(factorsRight_factorsBottom_128_u_8x16);
3978 const uint16x8_t factorsBottom_u_16x8 = vmovl_u8(vget_high_u8(factorsRight_factorsBottom_128_u_8x16));
3979
3980 const uint8x8_t factorsLeft_u_8x8 = vget_low_u8(factorsLeft_factorsTop_128_u_8x16);
3981 const uint16x8_t factorsTop_u_16x8 = vmovl_u8(vget_high_u8(factorsLeft_factorsTop_128_u_8x16));
3982
3983 const uint16x8_t intermediateTop_u_16x8 = vmlal_u8(vmull_u8(topLeft_u_8x8, factorsLeft_u_8x8), topRight_u_8x8, factorsRight_u_8x8); // intermediateTop = topLeft * factorLeft + topRight * factorRight
3984 const uint16x8_t intermediateBottom_u_16x8 = vmlal_u8(vmull_u8(bottomLeft_u_8x8, factorsLeft_u_8x8), bottomRight_u_8x8, factorsRight_u_8x8); // intermediateBottom = bottomLeft * factorLeft + bottomRight * factorRight
3985
3986 const uint32x4_t resultA_32x4 = vmlal_u16(vmull_u16(vget_low_u16(intermediateTop_u_16x8), vget_low_u16(factorsTop_u_16x8)), vget_low_u16(intermediateBottom_u_16x8), vget_low_u16(factorsBottom_u_16x8)); // result = intermediateTop * factorTop + intermediateBottom + factorBottom
3987 const uint32x4_t resultB_32x4 = vmlal_u16(vmull_u16(vget_high_u16(intermediateTop_u_16x8), vget_high_u16(factorsTop_u_16x8)), vget_high_u16(intermediateBottom_u_16x8), vget_high_u16(factorsBottom_u_16x8));
3988
3989 const uint16x8_t result_16x8 = vcombine_u16(vrshrn_n_u32(resultA_32x4, 14), vrshrn_n_u32(resultB_32x4, 14)); // round(result / 16384.0)
3990
3991 const uint8x8_t result_8x8 = vmovn_u16(result_16x8);
3992
3993 vst1_u8(targetPositionPixels, result_8x8);
3994}
3995
3996template <>
3997OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<2u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 2u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 2u>::Type* targetPositionPixels)
3998{
3999 ocean_assert(source != nullptr);
4000 ocean_assert(targetPositionPixels != nullptr);
4001
4002 using PixelType = typename DataType<uint8_t, 2u>::Type;
4003
4004 // as we do not initialize the following intermediate data,
4005 // we hopefully will not allocate memory on the stack each time this function is called
4006 PixelType topPixels[8];
4007 PixelType bottomPixels[8];
4008
4009 // we will store the pixel information in the following pattern (here for YA):
4010 // FE DC BA 98 76 54 32 10
4011 // YA YA YA YA YA YA YA YA
4012 // TR TL TR TL TR TL TR TL
4013
4014 // we gather the individual source pixel values from the source image,
4015 // based on the calculated pixel locations
4016 for (unsigned int i = 0u; i < 4u; ++i)
4017 {
4018 if (validPixels[i])
4019 {
4020 *(topPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4021 *(topPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4022 *(bottomPixels + i * 2u + 0u) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4023 *(bottomPixels + i * 2u + 1u) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4024 }
4025 else
4026 {
4027 *(topPixels + i * 2u + 0u) = borderColor;
4028 *(topPixels + i * 2u + 1u) = borderColor;
4029 *(bottomPixels + i * 2u + 0u) = borderColor;
4030 *(bottomPixels + i * 2u + 1u) = borderColor;
4031 }
4032 }
4033
4034 static_assert(sizeof(uint32x4_t) == sizeof(topPixels), "Invalid data type!");
4035
4036 const uint32x4_t m128_topPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topPixels));
4037 const uint32x4_t m128_bottomPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomPixels));
4038
4039
4040 // factorLeft = 128 - factorRight
4041 // factorTop = 128 - factorBottom
4042
4043 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4044 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4045
4046 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4047 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4048
4049 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4050 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4051 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4052 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4053
4054
4055 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4056
4057 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topPixels, m128_maskFirstByte), m128_factorsTopLeft);
4058 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4059
4060 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4061 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4062
4063 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4064 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4065
4066 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4067 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4068
4069
4070 // we add 8192 and shift by 14 bits
4071
4072 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4073 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4074
4075 // finaly we blend the interpolation results together to get the following pattern:
4076 // FE DC BA 98 76 54 32 10
4077 // 00 YA 00 YA 00 YA 00 YA
4078
4079 const uint32x4_t m128_interpolation = vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8));
4080
4081 // we shuffle the 128 bit register to a 64 bit register:
4082
4083 const uint8x8_t m64_mask0 = NEON::create_uint8x8(0, 1, 4, 5, 2, 2, 2, 2);
4084 const uint8x8_t m64_mask1 = NEON::create_uint8x8(2, 2, 2, 2, 0, 1, 4, 5);
4085
4086 const uint8x8_t m64_interpolation_low = vtbl1_u8(vget_low_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask0);
4087 const uint8x8_t m64_interpolation_high = vtbl1_u8(vget_high_u8(vreinterpretq_u8_u32(m128_interpolation)), m64_mask1);
4088
4089 const uint8x8_t m64_interpolation = vorr_u8(m64_interpolation_low, m64_interpolation_high);
4090
4091 // no we can store the following pattern as one block:
4092
4093 // 76 54 32 10
4094 // YA YA YA YA
4095
4096 vst1_u8((uint8_t*)targetPositionPixels, m64_interpolation);
4097}
4098
4099template <>
4100OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<3u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 3u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 3u>::Type* targetPositionPixels)
4101{
4102 ocean_assert(source != nullptr);
4103 ocean_assert(targetPositionPixels != nullptr);
4104
4105 // as we do not initialize the following intermediate data,
4106 // we hopefully will not allocate memory on the stack each time this function is called
4107 uint32_t topLeftPixels[4];
4108 uint32_t topRightPixels[4];
4109 uint32_t bottomLeftPixels[4];
4110 uint32_t bottomRightPixels[4];
4111
4112 // we will store the pixel information in the following pattern, note the padding byte after each pixel (here for RGB):
4113 // FEDCBA9876543210
4114 // BGR BGR BGR BGR
4115
4116 // we gather the individual source pixel values from the source image,
4117 // based on the calculated pixel locations
4118 for (unsigned int i = 0u; i < 4u; ++i)
4119 {
4120 if (validPixels[i])
4121 {
4122 memcpy(topLeftPixels + i, source + offsetsTopLeftElements[i], sizeof(uint8_t) * 3);
4123 memcpy(topRightPixels + i, source + offsetsTopRightElements[i], sizeof(uint8_t) * 3);
4124 memcpy(bottomLeftPixels + i, source + offsetsBottomLeftElements[i], sizeof(uint8_t) * 3);
4125 memcpy(bottomRightPixels + i, source + offsetsBottomRightElements[i], sizeof(uint8_t) * 3);
4126 }
4127 else
4128 {
4129 memcpy(topLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4130 memcpy(topRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4131 memcpy(bottomLeftPixels + i, &borderColor, sizeof(uint8_t) * 3);
4132 memcpy(bottomRightPixels + i, &borderColor, sizeof(uint8_t) * 3);
4133 }
4134 }
4135
4136 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4137
4138 const uint32x4_t m128_topLeftPixels = vld1q_u32(topLeftPixels);
4139 const uint32x4_t m128_topRightPixels = vld1q_u32(topRightPixels);
4140 const uint32x4_t m128_bottomLeftPixels = vld1q_u32(bottomLeftPixels);
4141 const uint32x4_t m128_bottomRightPixels = vld1q_u32(bottomRightPixels);
4142
4143
4144 // factorLeft = 128 - factorRight
4145 // factorTop = 128 - factorBottom
4146
4147 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4148 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4149
4150 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4151 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4152
4153 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4154 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4155 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4156 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4157
4158
4159 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4160
4161 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4162 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4163 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4164
4165 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4166 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4167 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4168
4169 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4170 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4171 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4172
4173 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4174 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4175 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4176
4177
4178 // we add 8192 and shift by 14 bits
4179
4180 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4181 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4182 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4183
4184 // finaly we blend the interpolation results together
4185
4186 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vshlq_n_u32(m128_interpolation2, 16));
4187
4188 // we have to extract the get rid of the padding byte:
4189 // FEDCBA9876543210
4190 // BGR BGR BGR BGR
4191
4192 uint32_t intermediateBuffer[4];
4193 vst1q_u32(intermediateBuffer, m128_interpolation);
4194
4195 for (unsigned int i = 0u; i < 4u; ++i)
4196 {
4197 memcpy(targetPositionPixels + i, intermediateBuffer + i, sizeof(uint8_t) * 3);
4198 }
4199}
4200
4201template <>
4202OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON<4u>(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const DataType<uint8_t, 4u>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, 4u>::Type* targetPositionPixels)
4203{
4204 ocean_assert(source != nullptr);
4205 ocean_assert(targetPositionPixels != nullptr);
4206
4207 using PixelType = typename DataType<uint8_t, 4u>::Type;
4208
4209 // as we do not initialize the following intermediate data,
4210 // we hopefully will not allocate memory on the stack each time this function is called
4211 PixelType topLeftPixels[4];
4212 PixelType topRightPixels[4];
4213 PixelType bottomLeftPixels[4];
4214 PixelType bottomRightPixels[4];
4215
4216 // we will store the pixel information in the following pattern (here for RGBA):
4217 // FEDC BA98 7654 3210
4218 // ABGR ABGR ABGR ABGR
4219
4220 // we gather the individual source pixel values from the source image,
4221 // based on the calculated pixel locations
4222 for (unsigned int i = 0u; i < 4u; ++i)
4223 {
4224 if (validPixels[i])
4225 {
4226 *(topLeftPixels + i) = *((const PixelType*)(source + offsetsTopLeftElements[i]));
4227 *(topRightPixels + i) = *((const PixelType*)(source + offsetsTopRightElements[i]));
4228 *(bottomLeftPixels + i) = *((const PixelType*)(source + offsetsBottomLeftElements[i]));
4229 *(bottomRightPixels + i) = *((const PixelType*)(source + offsetsBottomRightElements[i]));
4230 }
4231 else
4232 {
4233 *(topLeftPixels + i) = borderColor;
4234 *(topRightPixels + i) = borderColor;
4235 *(bottomLeftPixels + i) = borderColor;
4236 *(bottomRightPixels + i) = borderColor;
4237 }
4238 }
4239
4240 static_assert(sizeof(uint32x4_t) == sizeof(topLeftPixels), "Invalid data type!");
4241
4242 const uint32x4_t m128_topLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topLeftPixels));
4243 const uint32x4_t m128_topRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)topRightPixels));
4244 const uint32x4_t m128_bottomLeftPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomLeftPixels));
4245 const uint32x4_t m128_bottomRightPixels = vreinterpretq_u32_u8(vld1q_u8((const uint8_t*)bottomRightPixels));
4246
4247
4248 // factorLeft = 128 - factorRight
4249 // factorTop = 128 - factorBottom
4250
4251 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4252 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4253
4254 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4255 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4256
4257 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4258 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4259 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4260 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4261
4262
4263 const uint32x4_t m128_maskFirstByte = vdupq_n_u32(0x000000FFu);
4264
4265 uint32x4_t m128_muliplicationChannel0 = vmulq_u32(vandq_u32(m128_topLeftPixels, m128_maskFirstByte), m128_factorsTopLeft);
4266 uint32x4_t m128_muliplicationChannel1 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 8), m128_maskFirstByte), m128_factorsTopLeft);
4267 uint32x4_t m128_muliplicationChannel2 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 16), m128_maskFirstByte), m128_factorsTopLeft);
4268 uint32x4_t m128_muliplicationChannel3 = vmulq_u32(vandq_u32(vshrq_n_u32(m128_topLeftPixels, 24), m128_maskFirstByte), m128_factorsTopLeft);
4269
4270 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_topRightPixels, m128_maskFirstByte), m128_factorsTopRight));
4271 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 8), m128_maskFirstByte), m128_factorsTopRight));
4272 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 16), m128_maskFirstByte), m128_factorsTopRight));
4273 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_topRightPixels, 24), m128_maskFirstByte), m128_factorsTopRight));
4274
4275 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomLeftPixels, m128_maskFirstByte), m128_factorsBottomLeft));
4276 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 8), m128_maskFirstByte), m128_factorsBottomLeft));
4277 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 16), m128_maskFirstByte), m128_factorsBottomLeft));
4278 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomLeftPixels, 24), m128_maskFirstByte), m128_factorsBottomLeft));
4279
4280 m128_muliplicationChannel0 = vaddq_u32(m128_muliplicationChannel0, vmulq_u32(vandq_u32(m128_bottomRightPixels, m128_maskFirstByte), m128_factorsBottomRight));
4281 m128_muliplicationChannel1 = vaddq_u32(m128_muliplicationChannel1, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 8), m128_maskFirstByte), m128_factorsBottomRight));
4282 m128_muliplicationChannel2 = vaddq_u32(m128_muliplicationChannel2, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 16), m128_maskFirstByte), m128_factorsBottomRight));
4283 m128_muliplicationChannel3 = vaddq_u32(m128_muliplicationChannel3, vmulq_u32(vandq_u32(vshrq_n_u32(m128_bottomRightPixels, 24), m128_maskFirstByte), m128_factorsBottomRight));
4284
4285
4286 // we add 8192 and shift by 14 bits
4287
4288 const uint32x4_t m128_interpolation0 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel0, vdupq_n_u32(8192u)), 14);
4289 const uint32x4_t m128_interpolation1 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel1, vdupq_n_u32(8192u)), 14);
4290 const uint32x4_t m128_interpolation2 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel2, vdupq_n_u32(8192u)), 14);
4291 const uint32x4_t m128_interpolation3 = vshrq_n_u32(vaddq_u32(m128_muliplicationChannel3, vdupq_n_u32(8192u)), 14);
4292
4293 // finaly we blend the interpolation results together
4294
4295 const uint32x4_t m128_interpolation = vorrq_u32(vorrq_u32(m128_interpolation0, vshlq_n_u32(m128_interpolation1, 8)), vorrq_u32(vshlq_n_u32(m128_interpolation2, 16), vshlq_n_u32(m128_interpolation3, 24)));
4296
4297 vst1q_u8((uint8_t*)targetPositionPixels, vreinterpretq_u8_u32(m128_interpolation));
4298}
4299
4300template <unsigned int tChannels>
4301OCEAN_FORCE_INLINE void FrameInterpolatorBilinear::interpolate4Pixels8BitPerChannelNEON(const uint8_t* source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType<uint8_t, tChannels>::Type& borderColor, const uint32x4_t& m128_factorsRight, const uint32x4_t& m128_factorsBottom, typename DataType<uint8_t, tChannels>::Type* targetPositionPixels)
4302{
4303 ocean_assert(source != nullptr);
4304 ocean_assert(targetPositionPixels != nullptr);
4305
4306 // as we do not initialize the following intermediate data,
4307 // we hopefully will not allocate memory on the stack each time this function is called
4308 unsigned int factorsTopLeft[4];
4309 unsigned int factorsTopRight[4];
4310 unsigned int factorsBottomLeft[4];
4311 unsigned int factorsBottomRight[4];
4312
4313
4314 // factorLeft = 128 - factorRight
4315 // factorTop = 128 - factorBottom
4316
4317 const uint32x4_t m128_factorsLeft = vsubq_u32(vdupq_n_u32(128u), m128_factorsRight);
4318 const uint32x4_t m128_factorsTop = vsubq_u32(vdupq_n_u32(128u), m128_factorsBottom);
4319
4320 // (top_left * factorLeft + top_right * factorRight) * factorTop + (bottom_left * factorLeft + bottom_right * factorRight) * factorBottom
4321 // == top_left * factorTopLeft + top_right * factorTopRight + bottom_left * factorBottomLeft + bottom_right * factorBottomRight
4322
4323 const uint32x4_t m128_factorsTopLeft = vmulq_u32(m128_factorsTop, m128_factorsLeft);
4324 const uint32x4_t m128_factorsTopRight = vmulq_u32(m128_factorsTop, m128_factorsRight);
4325 const uint32x4_t m128_factorsBottomLeft = vmulq_u32(m128_factorsBottom, m128_factorsLeft);
4326 const uint32x4_t m128_factorsBottomRight = vmulq_u32(m128_factorsBottom, m128_factorsRight);
4327
4328
4329 // we store the interpolation factors
4330 vst1q_u32(factorsTopLeft, m128_factorsTopLeft);
4331 vst1q_u32(factorsTopRight, m128_factorsTopRight);
4332 vst1q_u32(factorsBottomLeft, m128_factorsBottomLeft);
4333 vst1q_u32(factorsBottomRight, m128_factorsBottomRight);
4334
4335 for (unsigned int i = 0u; i < 4u; ++i)
4336 {
4337 if (validPixels[i])
4338 {
4339 const uint8_t* topLeft = source + offsetsTopLeftElements[i];
4340 const uint8_t* topRight = source + offsetsTopRightElements[i];
4341
4342 const uint8_t* bottomLeft = source + offsetsBottomLeftElements[i];
4343 const uint8_t* bottomRight = source + offsetsBottomRightElements[i];
4344
4345 const unsigned int& factorTopLeft = factorsTopLeft[i];
4346 const unsigned int& factorTopRight = factorsTopRight[i];
4347 const unsigned int& factorBottomLeft = factorsBottomLeft[i];
4348 const unsigned int& factorBottomRight = factorsBottomRight[i];
4349
4350 for (unsigned int n = 0u; n < tChannels; ++n)
4351 {
4352 ((uint8_t*)targetPositionPixels)[n] = (topLeft[n] * factorTopLeft + topRight[n] * factorTopRight + bottomLeft[n] * factorBottomLeft + bottomRight[n] * factorBottomRight + 8192u) >> 14u;
4353 }
4354 }
4355 else
4356 {
4357 *targetPositionPixels = borderColor;
4358 }
4359
4360 targetPositionPixels++;
4361 }
4362}
4363
4364#endif // OCEAN_HARDWARE_NEON_VERSION
4365
4366template <unsigned int tChannels>
4367inline void FrameInterpolatorBilinear::homographies8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, const uint8_t* borderColor, uint8_t* output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4368{
4369 static_assert(tChannels >= 1u, "Invalid channel number!");
4370
4371 ocean_assert(input && output);
4372 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4373 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4374
4375 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4376 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4377 ocean_assert(homographies);
4378
4379 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4380
4381 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4382 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4383
4384 constexpr uint8_t zeroColor[tChannels] = {uint8_t(0)};
4385 const uint8_t* const bColor = borderColor ? borderColor : zeroColor;
4386
4387 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4388
4389 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4390 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4391
4392 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4393 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4394
4395 ocean_assert(right - left > Numeric::eps());
4396 ocean_assert(bottom - top > Numeric::eps());
4397
4398 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4399 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4400
4401 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4402 {
4403 for (unsigned int x = 0; x < outputWidth; ++x)
4404 {
4405 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4406
4407 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4408 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4409
4410 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4411
4412 const Scalar tx = 1 - _tx;
4413 const Scalar ty = 1 - _ty;
4414
4415 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4416 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4417 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4418 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4419
4420 const Scalar tTopLeft = tx * ty;
4421 const Scalar tTopRight = _tx * ty;
4422 const Scalar tBottomLeft = tx * _ty;
4423 const Scalar tBottomRight = _tx * _ty;
4424
4425 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4426 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4427
4428 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4429 {
4430 for (unsigned int c = 0u; c < tChannels; ++c)
4431 {
4432 outputData[c] = bColor[c];
4433 }
4434 }
4435 else
4436 {
4437 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4438 }
4439
4440 outputData += tChannels;
4441 }
4442
4443 outputData += outputPaddingElements;
4444 }
4445}
4446
4447template <unsigned int tChannels>
4448void FrameInterpolatorBilinear::homographyMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* input_H_output, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, unsigned int firstOutputRow, const unsigned int numberOutputRows)
4449{
4450 static_assert(tChannels >= 1u, "Invalid channel number!");
4451
4452 ocean_assert(input != nullptr && output != nullptr);
4453 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4454 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4455 ocean_assert(input_H_output != nullptr);
4456
4457 ocean_assert_and_suppress_unused(firstOutputRow + numberOutputRows <= outputHeight, outputHeight);
4458
4459 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
4460 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4461
4462 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4463 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4464
4465 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4466
4467 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4468 {
4469 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4470 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
4471
4472 /*
4473 * We can slightly optimize the 3x3 matrix multiplication:
4474 *
4475 * | X0 Y0 Z0 | | x |
4476 * | X1 Y1 Z1 | * | y |
4477 * | X2 Y2 Z2 | | 1 |
4478 *
4479 * | x' | | X0 * x | | Y0 * y + Z0 |
4480 * | y' | = | X1 * x | + | Y1 * y + Z1 |
4481 * | z' | | X2 * x | | Y2 * y + Z2 |
4482 *
4483 * As y is constant within the inner loop, we can pre-calculate the following terms:
4484 *
4485 * | x' | | (X0 * x + constValue0) / (X2 * x + constValue2) |
4486 * | y' | = | (X1 * x + constValue1) / (X2 * x + constValue2) |
4487 *
4488 * | p | = | (X * x + c) / (X2 * x + constValue2) |
4489 */
4490
4491 const Vector2 X(input_H_output->data() + 0);
4492 const Vector2 c(Vector2(input_H_output->data() + 3) * Scalar(y) + Vector2(input_H_output->data() + 6));
4493
4494 const Scalar X2 = (*input_H_output)(2, 0);
4495 const Scalar constValue2 = (*input_H_output)(2, 1) * Scalar(y) + (*input_H_output)(2, 2);
4496
4497 for (unsigned int x = 0; x < outputWidth; ++x)
4498 {
4499 const Vector2 inputPosition((X * Scalar(x) + c) / (X2 * Scalar(x) + constValue2));
4500
4501#ifdef OCEAN_DEBUG
4502 const Vector2 debugInputPosition(*input_H_output * Vector2(Scalar(x), Scalar(y)));
4503 ocean_assert(inputPosition.isEqual(debugInputPosition, Scalar(0.01)));
4504#endif
4505
4506 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4507 {
4508 *outputMaskData = 0xFF - maskValue;
4509 }
4510 else
4511 {
4512 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4513 *outputMaskData = maskValue;
4514 }
4515
4516 outputData++;
4517 outputMaskData++;
4518 }
4519 }
4520}
4521
4522template <unsigned int tChannels>
4523inline void FrameInterpolatorBilinear::homographiesMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3* homographies, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
4524{
4525 static_assert(tChannels >= 1u, "Invalid channel number!");
4526
4527 ocean_assert(input && output);
4528 ocean_assert(inputWidth > 0u && inputHeight > 0u);
4529 ocean_assert(outputWidth > 0u && outputHeight > 0u);
4530
4531 ocean_assert(outputQuadrantCenterX >= 0 && outputQuadrantCenterX < Scalar(outputWidth));
4532 ocean_assert(outputQuadrantCenterY >= 0 && outputQuadrantCenterY < Scalar(outputHeight));
4533 ocean_assert(homographies);
4534
4535 const unsigned int outputStrideElements = tChannels * outputWidth + outputPaddingElements;
4536 const unsigned int outputMaskStrideElements = outputWidth + outputMaskPaddingElements;
4537
4538 const Scalar scalarInputWidth_1 = Scalar(inputWidth - 1u);
4539 const Scalar scalarInputHeight_1 = Scalar(inputHeight - 1u);
4540
4541 uint8_t* outputData = output + firstOutputRow * outputStrideElements;
4542 outputMask += firstOutputRow * outputMaskStrideElements;
4543
4544 const Scalar left = Scalar(outputQuadrantCenterX) * Scalar(0.5);
4545 const Scalar right = (Scalar(outputWidth) + Scalar(outputQuadrantCenterX)) * Scalar(0.5);
4546
4547 const Scalar top = Scalar(outputQuadrantCenterY) * Scalar(0.5);
4548 const Scalar bottom = (Scalar(outputHeight) + Scalar(outputQuadrantCenterY)) * Scalar(0.5);
4549
4550 ocean_assert(right - left > Numeric::eps());
4551 ocean_assert(bottom - top > Numeric::eps());
4552
4553 const Scalar invWidth = Scalar(1) / Scalar(right - left);
4554 const Scalar invHeight = Scalar(1) / Scalar(bottom - top);
4555
4556 for (unsigned int y = firstOutputRow; y < firstOutputRow + numberOutputRows; ++y)
4557 {
4558 for (unsigned int x = 0u; x < outputWidth; ++x)
4559 {
4560 Vector2 outputPosition = Vector2(Scalar(int(x)), Scalar(int(y)));
4561
4562 const Scalar _tx = minmax<Scalar>(0, (outputPosition.x() - left) * invWidth, 1);
4563 const Scalar _ty = minmax<Scalar>(0, (outputPosition.y() - top) * invHeight, 1);
4564
4565 outputPosition += Vector2(Scalar(outputOriginX), Scalar(outputOriginY));
4566
4567 const Scalar tx = 1 - _tx;
4568 const Scalar ty = 1 - _ty;
4569
4570 const Vector2 inputPositionTopLeft(homographies[0] * outputPosition);
4571 const Vector2 inputPositionTopRight(homographies[1] * outputPosition);
4572 const Vector2 inputPositionBottomLeft(homographies[2] * outputPosition);
4573 const Vector2 inputPositionBottomRight(homographies[3] * outputPosition);
4574
4575 const Scalar tTopLeft = tx * ty;
4576 const Scalar tTopRight = _tx * ty;
4577 const Scalar tBottomLeft = tx * _ty;
4578 const Scalar tBottomRight = _tx * _ty;
4579
4580 const Vector2 inputPosition = inputPositionTopLeft * tTopLeft + inputPositionTopRight * tTopRight
4581 + inputPositionBottomLeft * tBottomLeft + inputPositionBottomRight * tBottomRight;
4582
4583 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4584 {
4585 *outputMask = 0xFFu - maskValue;
4586 }
4587 else
4588 {
4589 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, outputData);
4590 *outputMask = maskValue;
4591 }
4592
4593 outputData += tChannels;
4594 outputMask++;
4595 }
4596
4597 outputData += outputPaddingElements;
4598 outputMask += outputMaskPaddingElements;
4599 }
4600}
4601
4602template <unsigned int tChannels>
4603void FrameInterpolatorBilinear::homographyWithCamera8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const SquareMatrix3* normalizedHomography, const bool useDistortionParameters, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4604{
4605 static_assert(tChannels >= 1u, "Invalid channel number!");
4606
4607 ocean_assert(inputCamera && outputCamera && normalizedHomography);
4608 ocean_assert(input && output);
4609
4610 ocean_assert(firstRow + numberRows <= outputCamera->height());
4611
4612 const unsigned int outputStrideElements = tChannels * outputCamera->width() + outputPaddingElements;
4613
4614 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4615 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4616
4617 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4618
4619 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4620
4621 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4622 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4623
4624 uint8_t* outputData = output + firstRow * outputStrideElements;
4625
4626 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4627 {
4628 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4629 {
4630 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4631
4632 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4633 {
4634 *((PixelType*)outputData) = *bColor;
4635 }
4636 else
4637 {
4638 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4639 }
4640
4641 outputData += tChannels;
4642 }
4643
4644 outputData += outputPaddingElements;
4645 }
4646}
4647
4648template <unsigned int tChannels>
4649void FrameInterpolatorBilinear::homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera* inputCamera, const PinholeCamera* outputCamera, const PinholeCamera::DistortionLookup* outputCameraDistortionLookup, const uint8_t* input, const unsigned int inputPaddingElements, const SquareMatrix3* normalizedHomography, uint8_t* output, uint8_t* outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
4650{
4651 static_assert(tChannels >= 1u, "Invalid channel number!");
4652
4653 ocean_assert(inputCamera != nullptr && outputCamera != nullptr && normalizedHomography != nullptr);
4654 ocean_assert(input != nullptr && output != nullptr);
4655
4656 ocean_assert(firstRow + numberRows <= outputCamera->height());
4657
4658 const unsigned int outputStrideElements = outputCamera->width() * tChannels + outputPaddingElements;
4659 const unsigned int outputMaskStrideElements = outputCamera->width() + outputMaskPaddingElements;
4660
4661 const Scalar scalarInputWidth_1 = Scalar(inputCamera->width() - 1u);
4662 const Scalar scalarInputHeight_1 = Scalar(inputCamera->height() - 1u);
4663
4664 const SquareMatrix3 combinedMatrix(*normalizedHomography * outputCamera->invertedIntrinsic());
4665
4666 uint8_t* outputData = output + firstRow * outputStrideElements;
4667 outputMask += firstRow * outputMaskStrideElements;
4668
4669 constexpr bool useDistortionParameters = true;
4670
4671 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4672 {
4673 for (unsigned int x = 0; x < outputCamera->width(); ++x)
4674 {
4675 const Vector2 inputPosition(inputCamera->normalizedImagePoint2imagePoint<true>(combinedMatrix * outputCameraDistortionLookup->undistortedImagePoint(Vector2(Scalar(x), Scalar(y))), useDistortionParameters));
4676
4677 if (inputPosition.x() < Scalar(0) || inputPosition.x() > scalarInputWidth_1 || inputPosition.y() < Scalar(0) || inputPosition.y() > scalarInputHeight_1)
4678 {
4679 *outputMask = 0xFF - maskValue;
4680 }
4681 else
4682 {
4683 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputCamera->width(), inputCamera->height(), inputPaddingElements, inputPosition, outputData);
4684 *outputMask = maskValue;
4685 }
4686
4687 outputData += tChannels;
4688 ++outputMask;
4689 }
4690
4691 outputData += outputPaddingElements;
4692 outputMask += outputMaskPaddingElements;
4693 }
4694}
4695
4696template <unsigned int tChannels>
4697void FrameInterpolatorBilinear::lookup8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4698{
4699 static_assert(tChannels >= 1u, "Invalid channel number!");
4700
4701 ocean_assert(input_LT_output != nullptr);
4702 ocean_assert(input != nullptr && output != nullptr);
4703
4704 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4705 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4706
4707 using PixelType = typename DataType<uint8_t, tChannels>::Type;
4708
4709 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
4710 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4711
4712 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4713
4714 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4715
4716 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4717
4718 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4719 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4720
4721 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4722 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4723
4724 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4725 {
4726 input_LT_output->bilinearValues(y, rowLookupData);
4727
4728 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4729
4730 for (unsigned int x = 0u; x < columns; ++x)
4731 {
4732 const Vector2& lookupValue = rowLookupData[x];
4733
4734 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4735
4736 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4737 {
4738 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
4739 }
4740 else
4741 {
4742 *outputData = *bColor;
4743 }
4744
4745 outputData++;
4746 }
4747 }
4748}
4749
4750template <typename T, unsigned int tChannels>
4751void FrameInterpolatorBilinear::lookupSubset(const T* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const T* borderColor, T* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4752{
4753 static_assert(tChannels >= 1u, "Invalid channel number!");
4754
4755 ocean_assert((!std::is_same<uint8_t, T>::value));
4756
4757 ocean_assert(input_LT_output != nullptr);
4758 ocean_assert(input != nullptr && output != nullptr);
4759
4760 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4761 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4762
4763 using PixelType = typename DataType<T, tChannels>::Type;
4764
4765 const T zeroColor[tChannels] = {T(0)};
4766 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
4767
4768 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
4769
4770 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
4771
4772 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4773
4774 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
4775 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
4776
4777 Memory rowLookupMemory = Memory::create<Vector2>(columns);
4778 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
4779
4780 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4781 {
4782 input_LT_output->bilinearValues(y, rowLookupData);
4783
4784 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
4785
4786 for (unsigned int x = 0u; x < columns; ++x)
4787 {
4788 const Vector2& lookupValue = rowLookupData[x];
4789
4790 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
4791
4792 if (inputPosition.x() >= Scalar(0) && inputPosition.y() >= Scalar(0) && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
4793 {
4794 interpolatePixel<T, T, tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (T*)(outputData));
4795 }
4796 else
4797 {
4798 *outputData = *bColor;
4799 }
4800
4801 outputData++;
4802 }
4803 }
4804}
4805
4806#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4807
4808template <>
4809inline void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON<1u>(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4810{
4811 ocean_assert(input_LT_output != nullptr);
4812 ocean_assert(input != nullptr && output != nullptr);
4813
4814 ocean_assert(inputWidth != 0u && inputHeight != 0u);
4815 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
4816
4817 using PixelType = uint8_t;
4818
4819 const uint8x16_t constantBorderColor_u_8x16 = vdupq_n_u8(borderColor ? *borderColor : 0u);
4820
4821 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
4822 ocean_assert(outputWidth >= 8u);
4823
4824 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
4825
4826 const unsigned int inputStrideElements = inputWidth + inputPaddingElements;
4827 const unsigned int outputStrideElements = outputWidth + outputPaddingElements;
4828
4829 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
4830 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
4831
4832 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
4833 const float32x4_t constantEight_f_32x4 = vdupq_n_f32(8.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
4834
4835 // [0.0f, 1.0f, 2.0f, 3.0f, ...]
4836 const float f_01234567[8] = {0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f};
4837 const float32x4_t conststant0123_f_32x4 = vld1q_f32(f_01234567 + 0);
4838 const float32x4_t conststant4567_f_32x4 = vld1q_f32(f_01234567 + 4);
4839
4840 const float32x4_t constant128_f_32x4 = vdupq_n_f32(128.0f);
4841
4842 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
4843
4844 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(1u);
4845
4846 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
4847 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
4848
4849 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
4850 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
4851
4852 unsigned int validPixels[8];
4853
4854 unsigned int topLeftOffsetsElements[8];
4855 unsigned int bottomLeftOffsetsElements[8];
4856
4857 uint8_t pixels[32];
4858
4859 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4860 {
4861 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
4862
4863 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
4864
4865 float32x4_t additionalInputOffsetX0123_f_32x4 = conststant0123_f_32x4;
4866 float32x4_t additionalInputOffsetX4567_f_32x4 = conststant4567_f_32x4;
4867
4868 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
4869
4870 for (unsigned int x = 0u; x < outputWidth; x += 8u)
4871 {
4872 if (x + 8u > outputWidth)
4873 {
4874 // the last iteration will not fit into the output frame,
4875 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
4876
4877 ocean_assert(x >= 8u && outputWidth > 8u);
4878 const unsigned int newX = outputWidth - 8u;
4879
4880 ocean_assert(x > newX);
4881 const unsigned int xOffset = x - newX;
4882
4883 outputPixelData -= xOffset;
4884
4885 if (offset)
4886 {
4887 additionalInputOffsetX0123_f_32x4 = vsubq_f32(additionalInputOffsetX0123_f_32x4, vdupq_n_f32(float(xOffset)));
4888 additionalInputOffsetX4567_f_32x4 = vsubq_f32(additionalInputOffsetX4567_f_32x4, vdupq_n_f32(float(xOffset)));
4889 }
4890
4891 x = newX;
4892
4893 // the for loop will stop after this iteration
4894 ocean_assert(!(x + 8u < outputWidth));
4895 }
4896
4897 const float32x4x2_t inputPositions0123_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 0u));
4898 const float32x4x2_t inputPositions4567_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x + 4u));
4899
4900 float32x4_t inputPositionsX0123_f_32x4 = inputPositions0123_f_32x4x2.val[0];
4901 float32x4_t inputPositionsY0123_f_32x4 = inputPositions0123_f_32x4x2.val[1];
4902
4903 float32x4_t inputPositionsX4567_f_32x4 = inputPositions4567_f_32x4x2.val[0];
4904 float32x4_t inputPositionsY4567_f_32x4 = inputPositions4567_f_32x4x2.val[1];
4905
4906 if (offset)
4907 {
4908 inputPositionsX0123_f_32x4 = vaddq_f32(inputPositionsX0123_f_32x4, additionalInputOffsetX0123_f_32x4);
4909 inputPositionsY0123_f_32x4 = vaddq_f32(inputPositionsY0123_f_32x4, additionalInputOffsetY_f_32x4);
4910
4911 inputPositionsX4567_f_32x4 = vaddq_f32(inputPositionsX4567_f_32x4, additionalInputOffsetX4567_f_32x4);
4912 inputPositionsY4567_f_32x4 = vaddq_f32(inputPositionsY4567_f_32x4, additionalInputOffsetY_f_32x4);
4913
4914 additionalInputOffsetX0123_f_32x4 = vaddq_f32(additionalInputOffsetX0123_f_32x4, constantEight_f_32x4);
4915 additionalInputOffsetX4567_f_32x4 = vaddq_f32(additionalInputOffsetX4567_f_32x4, constantEight_f_32x4);
4916 }
4917
4918 // now we check whether we are inside the input frame
4919 const uint32x4_t validPixelsX0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX0123_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX0123_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() < (inputWidth - 1) ? 0xFFFFFF : 0x000000
4920 const uint32x4_t validPixelsX4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsX4567_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX4567_f_32x4, constantZero_f_32x4));
4921
4922 const uint32x4_t validPixelsY0123_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY0123_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY0123_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() < (inputHeight - 1) ? 0xFFFFFF : 0x000000
4923 const uint32x4_t validPixelsY4567_u_32x4 = vandq_u32(vcltq_f32(inputPositionsY4567_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY4567_f_32x4, constantZero_f_32x4));
4924
4925 const uint32x4_t validPixels0123_u_32x4 = vandq_u32(validPixelsX0123_u_32x4, validPixelsY0123_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
4926 const uint32x4_t validPixels4567_u_32x4 = vandq_u32(validPixelsX4567_u_32x4, validPixelsY4567_u_32x4);
4927
4928 vst1q_u32(validPixels + 0, validPixels0123_u_32x4);
4929 vst1q_u32(validPixels + 4, validPixels4567_u_32x4);
4930
4931
4932 const uint32x4_t inputPositionsLeft0123_u_32x4 = vcvtq_u32_f32(inputPositionsX0123_f_32x4);
4933 const uint32x4_t inputPositionsLeft4567_u_32x4 = vcvtq_u32_f32(inputPositionsX4567_f_32x4);
4934
4935 const uint32x4_t inputPositionsTop0123_u_32x4 = vcvtq_u32_f32(inputPositionsY0123_f_32x4);
4936 const uint32x4_t inputPositionsTop4567_u_32x4 = vcvtq_u32_f32(inputPositionsY4567_f_32x4);
4937
4938 const uint32x4_t inputPositionsBottom0123_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop0123_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4939 const uint32x4_t inputPositionsBottom4567_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop4567_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
4940
4941
4942 const uint32x4_t topLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsTop0123_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
4943 vst1q_u32(topLeftOffsetsElements + 0, topLeftOffsetsElements0123_u_32x4);
4944 const uint32x4_t topLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsTop4567_u_32x4, constantInputStrideElements_u_32x4);
4945 vst1q_u32(topLeftOffsetsElements + 4, topLeftOffsetsElements4567_u_32x4);
4946
4947 const uint32x4_t bottomLeftOffsetsElements0123_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft0123_u_32x4, constantChannels_u_32x4), inputPositionsBottom0123_u_32x4, constantInputStrideElements_u_32x4);
4948 vst1q_u32(bottomLeftOffsetsElements + 0, bottomLeftOffsetsElements0123_u_32x4);
4949 const uint32x4_t bottomLeftOffsetsElements4567_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft4567_u_32x4, constantChannels_u_32x4), inputPositionsBottom4567_u_32x4, constantInputStrideElements_u_32x4);
4950 vst1q_u32(bottomLeftOffsetsElements + 4, bottomLeftOffsetsElements4567_u_32x4);
4951
4952
4953 // we determine the fractional portions of the x' and y' and [0.0, 1.0] -> [0, 128]
4954 float32x4_t tx0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX0123_f_32x4, vcvtq_f32_u32(inputPositionsLeft0123_u_32x4)), constant128_f_32x4);
4955 float32x4_t tx4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsX4567_f_32x4, vcvtq_f32_u32(inputPositionsLeft4567_u_32x4)), constant128_f_32x4);
4956
4957 float32x4_t ty0123_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY0123_f_32x4, vcvtq_f32_u32(inputPositionsTop0123_u_32x4)), constant128_f_32x4);
4958 float32x4_t ty4567_f_32x4 = vmulq_f32(vsubq_f32(inputPositionsY4567_f_32x4, vcvtq_f32_u32(inputPositionsTop4567_u_32x4)), constant128_f_32x4);
4959
4960 const uint32x4_t tx0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx0123_f_32x4, vdupq_n_f32(0.5)));
4961 const uint32x4_t tx4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx4567_f_32x4, vdupq_n_f32(0.5)));
4962
4963 const uint32x4_t ty0123_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty0123_f_32x4, vdupq_n_f32(0.5)));
4964 const uint32x4_t ty4567_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty4567_f_32x4, vdupq_n_f32(0.5)));
4965
4966 const uint16x8_t tx01234567_128_u_16x8 = vcombine_u16(vmovn_u32(tx0123_128_u_32x4), vmovn_u32(tx4567_128_u_32x4));
4967 const uint16x8_t ty01234567_128_u_16x8 = vcombine_u16(vmovn_u32(ty0123_128_u_32x4), vmovn_u32(ty4567_128_u_32x4));
4968
4969 const uint8x16_t tx_ty_128_u_8x16 = vcombine_u8(vmovn_u16(tx01234567_128_u_16x8), vmovn_u16(ty01234567_128_u_16x8));
4970
4971
4972 vst1q_u8(pixels + 0, constantBorderColor_u_8x16); // initialize with border color
4973 vst1q_u8(pixels + 16, constantBorderColor_u_8x16);
4974
4975 struct LeftRightPixel
4976 {
4977 uint8_t left;
4978 uint8_t right;
4979 };
4980
4981 static_assert(sizeof(LeftRightPixel) == 2, "Invalid data type!");
4982
4983 // we gather the individual source pixel values from the source image,
4984 // based on the calculated pixel locations
4985 for (unsigned int i = 0u; i < 8u; ++i)
4986 {
4987 if (validPixels[i])
4988 {
4989 ocean_assert((topLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u); // we need to have one additional pixel to the right (as we copy two pixels at once)
4990 ocean_assert((bottomLeftOffsetsElements[i] % inputStrideElements) < inputWidth - 1u);
4991
4992 ((LeftRightPixel*)pixels)[0u + i] = *(LeftRightPixel*)(input + topLeftOffsetsElements[i]);
4993 ((LeftRightPixel*)pixels)[8u + i] = *(LeftRightPixel*)(input + bottomLeftOffsetsElements[i]);
4994 }
4995 }
4996
4997 const uint8x8x2_t topLeft_topRight_u_8x8x2 = vld2_u8(pixels);
4998 const uint8x8x2_t bottomLeft_bottomRight_u_8x8x2 = vld2_u8(pixels + 16);
4999
5000 interpolate8Pixels1Channel8BitNEON(topLeft_topRight_u_8x8x2.val[0], topLeft_topRight_u_8x8x2.val[1], bottomLeft_bottomRight_u_8x8x2.val[0], bottomLeft_bottomRight_u_8x8x2.val[1], tx_ty_128_u_8x16, outputPixelData);
5001
5002 outputPixelData += 8;
5003 }
5004 }
5005}
5006
5007template <unsigned int tChannels>
5008void FrameInterpolatorBilinear::lookup8BitPerChannelSubsetNEON(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, const uint8_t* borderColor, uint8_t* output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5009{
5010 ocean_assert(input_LT_output != nullptr);
5011 ocean_assert(input != nullptr && output != nullptr);
5012
5013 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5014 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5015
5016 using PixelType = typename DataType<uint8_t, tChannels>::Type;
5017
5018 const uint8_t zeroColor[tChannels] = {uint8_t(0)};
5019 const PixelType* const bColor = borderColor ? (PixelType*)borderColor : (PixelType*)zeroColor;
5020
5021 const unsigned int outputWidth = (unsigned int)(input_LT_output->sizeX());
5022 ocean_assert(outputWidth >= 4u);
5023
5024 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5025
5026 const unsigned int inputStrideElements = inputWidth * tChannels + inputPaddingElements;
5027 const unsigned int outputStrideElements = outputWidth * tChannels + outputPaddingElements;
5028
5029 Memory rowLookupMemory = Memory::create<VectorF2>(outputWidth);
5030 VectorF2* const rowLookupData = rowLookupMemory.data<VectorF2>();
5031
5032 const float32x4_t constantZero_f_32x4 = vdupq_n_f32(0.0f); // [0.0f, 0.0f, 0.0f, 0.0f]
5033 const float32x4_t constantFour_f_32x4 = vdupq_n_f32(4.0f); // [4.0f, 4.0f, 4.0f, 4.0f]
5034
5035 // [0.0f, 1.0f, 2.0f, 3.0f]
5036 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
5037 float32x4_t conststant0123_f_32x4 = vld1q_f32(f_0123);
5038
5039 const uint32x4_t constantOne_u_32x4 = vdupq_n_u32(1u);
5040
5041 const uint32x4_t constantChannels_u_32x4 = vdupq_n_u32(tChannels);
5042
5043 const float32x4_t constantInputWidth1_f_32x4 = vdupq_n_f32(float(inputWidth - 1u));
5044 const float32x4_t constantInputHeight1_f_32x4 = vdupq_n_f32(float(inputHeight - 1u));
5045
5046 const uint32x4_t constantInputStrideElements_u_32x4 = vdupq_n_u32(inputStrideElements);
5047 const uint32x4_t constantInputWidth1_u_32x4 = vdupq_n_u32(inputWidth - 1u);
5048 const uint32x4_t constantInputHeight1_u_32x4 = vdupq_n_u32(inputHeight - 1u);
5049
5050 unsigned int validPixels[4];
5051
5052 unsigned int topLeftOffsetsElements[4];
5053 unsigned int topRightOffsetsElements[4];
5054 unsigned int bottomLeftOffsetsElements[4];
5055 unsigned int bottomRightOffsetsElements[4];
5056
5057 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5058 {
5059 PixelType* outputPixelData = (PixelType*)(output + y * outputStrideElements);
5060
5061 input_LT_output->bilinearValues<VectorF2>(y, rowLookupData);
5062
5063 float32x4_t additionalInputOffsetX_f_32x4 = conststant0123_f_32x4;
5064 const float32x4_t additionalInputOffsetY_f_32x4 = vdupq_n_f32(float(y));
5065
5066 for (unsigned int x = 0u; x < outputWidth; x += 4u)
5067 {
5068 if (x + 4u > outputWidth)
5069 {
5070 // the last iteration will not fit into the output frame,
5071 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
5072
5073 ocean_assert(x >= 4u && outputWidth > 4u);
5074 const unsigned int newX = outputWidth - 4u;
5075
5076 ocean_assert(x > newX);
5077 const unsigned int xOffset = x - newX;
5078
5079 outputPixelData -= xOffset;
5080
5081 if (offset)
5082 {
5083 additionalInputOffsetX_f_32x4 = vsubq_f32(additionalInputOffsetX_f_32x4, vdupq_n_f32(float(xOffset)));
5084 }
5085
5086 x = newX;
5087
5088 // the for loop will stop after this iteration
5089 ocean_assert(!(x + 4u < outputWidth));
5090 }
5091
5092 const float32x4x2_t inputPositions_f_32x4x2 = vld2q_f32((const float*)(rowLookupData + x));
5093
5094 float32x4_t inputPositionsX_f_32x4 = inputPositions_f_32x4x2.val[0];
5095 float32x4_t inputPositionsY_f_32x4 = inputPositions_f_32x4x2.val[1];
5096
5097 if (offset)
5098 {
5099 inputPositionsX_f_32x4 = vaddq_f32(inputPositionsX_f_32x4, additionalInputOffsetX_f_32x4);
5100 inputPositionsY_f_32x4 = vaddq_f32(inputPositionsY_f_32x4, additionalInputOffsetY_f_32x4);
5101
5102 additionalInputOffsetX_f_32x4 = vaddq_f32(additionalInputOffsetX_f_32x4, constantFour_f_32x4);
5103 }
5104
5105 // now we check whether we are inside the input frame
5106 const uint32x4_t validPixelsX_u_32x4 = vandq_u32(vcleq_f32(inputPositionsX_f_32x4, constantInputWidth1_f_32x4), vcgeq_f32(inputPositionsX_f_32x4, constantZero_f_32x4)); // inputPosition.x() >= 0 && inputPosition.x() <= (inputWidth - 1) ? 0xFFFFFF : 0x000000
5107 const uint32x4_t validPixelsY_u_32x4 = vandq_u32(vcleq_f32(inputPositionsY_f_32x4, constantInputHeight1_f_32x4), vcgeq_f32(inputPositionsY_f_32x4, constantZero_f_32x4)); // inputPosition.y() >= 0 && inputPosition.y() <= (inputHeight - 1) ? 0xFFFFFF : 0x000000
5108
5109 const uint32x4_t validPixels_u_32x4 = vandq_u32(validPixelsX_u_32x4, validPixelsY_u_32x4); // is_inside_input_frame(inputPosition) ? 0xFFFFFF : 0x000000
5110
5111 vst1q_u32(validPixels, validPixels_u_32x4);
5112
5113 const uint32x4_t inputPositionsLeft_u_32x4 = vcvtq_u32_f32(inputPositionsX_f_32x4);
5114 const uint32x4_t inputPositionsTop_u_32x4 = vcvtq_u32_f32(inputPositionsY_f_32x4);
5115
5116 const uint32x4_t inputPositionsRight_u_32x4 = vminq_u32(vaddq_u32(inputPositionsLeft_u_32x4, constantOne_u_32x4), constantInputWidth1_u_32x4);
5117 const uint32x4_t inputPositionsBottom_u_32x4 = vminq_u32(vaddq_u32(inputPositionsTop_u_32x4, constantOne_u_32x4), constantInputHeight1_u_32x4);
5118
5119 const uint32x4_t topLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4); // topLeftOffset = top * strideElements + left * channels
5120 const uint32x4_t topRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsTop_u_32x4, constantInputStrideElements_u_32x4);
5121 const uint32x4_t bottomLeftOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsLeft_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5122 const uint32x4_t bottomRightOffsetsElements_u_32x4 = vmlaq_u32(vmulq_u32(inputPositionsRight_u_32x4, constantChannels_u_32x4), inputPositionsBottom_u_32x4, constantInputStrideElements_u_32x4);
5123
5124 vst1q_u32(topLeftOffsetsElements, topLeftOffsetsElements_u_32x4);
5125 vst1q_u32(topRightOffsetsElements, topRightOffsetsElements_u_32x4);
5126 vst1q_u32(bottomLeftOffsetsElements, bottomLeftOffsetsElements_u_32x4);
5127 vst1q_u32(bottomRightOffsetsElements, bottomRightOffsetsElements_u_32x4);
5128
5129 // we determine the fractional portions of the x' and y':
5130 float32x4_t tx_f_32x4 = vsubq_f32(inputPositionsX_f_32x4, vcvtq_f32_u32(inputPositionsLeft_u_32x4));
5131 float32x4_t ty_f_32x4 = vsubq_f32(inputPositionsY_f_32x4, vcvtq_f32_u32(inputPositionsTop_u_32x4));
5132
5133 // we use integer interpolation [0.0, 1.0] -> [0, 128]
5134 tx_f_32x4 = vmulq_f32(tx_f_32x4, vdupq_n_f32(128.0f));
5135 ty_f_32x4 = vmulq_f32(ty_f_32x4, vdupq_n_f32(128.0f));
5136
5137 const uint32x4_t tx_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(tx_f_32x4, vdupq_n_f32(0.5)));
5138 const uint32x4_t ty_128_u_32x4 = vcvtq_u32_f32(vaddq_f32(ty_f_32x4, vdupq_n_f32(0.5)));
5139
5140 interpolate4Pixels8BitPerChannelNEON<tChannels>(input, topLeftOffsetsElements, topRightOffsetsElements, bottomLeftOffsetsElements, bottomRightOffsetsElements, validPixels, *bColor, tx_128_u_32x4, ty_128_u_32x4, outputPixelData);
5141
5142 outputPixelData += 4;
5143 }
5144 }
5145}
5146
5147#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5148
5149template <unsigned int tChannels>
5150void FrameInterpolatorBilinear::lookupMask8BitPerChannelSubset(const uint8_t* input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable* input_LT_output, const bool offset, uint8_t* output, uint8_t* outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5151{
5152 ocean_assert(input_LT_output != nullptr);
5153 ocean_assert(input != nullptr && output != nullptr);
5154
5155 ocean_assert(inputWidth != 0u && inputHeight != 0u);
5156 ocean_assert(firstRow + numberRows <= input_LT_output->sizeY());
5157
5158 using PixelType = typename DataType<uint8_t, tChannels>::Type;
5159
5160 const unsigned int columns = (unsigned int)(input_LT_output->sizeX());
5161
5162 const unsigned int outputStrideElements = tChannels * columns + outputPaddingElements;
5163 const unsigned int outputMaskStrideElements = columns + outputMaskPaddingElements;
5164
5165 static_assert(std::is_same<Vector2, LookupTable::Type>::value, "Invalid data type!");
5166
5167 const Scalar inputWidth1 = Scalar(inputWidth - 1u);
5168 const Scalar inputHeight1 = Scalar(inputHeight - 1u);
5169
5170 Memory rowLookupMemory = Memory::create<Vector2>(columns);
5171 Vector2* const rowLookupData = rowLookupMemory.data<Vector2>();
5172
5173 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
5174 {
5175 input_LT_output->bilinearValues(y, rowLookupData);
5176
5177 PixelType* outputData = (PixelType*)(output + y * outputStrideElements);
5178 uint8_t* outputMaskData = outputMask + y * outputMaskStrideElements;
5179
5180 for (unsigned int x = 0u; x < columns; ++x)
5181 {
5182 const Vector2& lookupValue = rowLookupData[x];
5183
5184 const Vector2 inputPosition = offset ? Vector2(Scalar(x) + lookupValue.x(), Scalar(y) + lookupValue.y()) : lookupValue;
5185
5186 if (inputPosition.x() >= 0 && inputPosition.y() >= 0 && inputPosition.x() <= inputWidth1 && inputPosition.y() <= inputHeight1)
5187 {
5188 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(input, inputWidth, inputHeight, inputPaddingElements, inputPosition, (uint8_t*)(outputData));
5189 *outputMaskData = maskValue;
5190 }
5191 else
5192 {
5193 *outputMaskData = 0xFFu - maskValue;
5194 }
5195
5196 outputData++;
5197 outputMaskData++;
5198 }
5199 }
5200}
5201
5202template <unsigned int tChannels>
5203void FrameInterpolatorBilinear::scale8BitPerChannel(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
5204{
5205 ocean_assert(source != nullptr && target != nullptr);
5206 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
5207 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
5208 ocean_assert(sourceX_s_targetX > 0.0);
5209 ocean_assert(sourceY_s_targetY > 0.0);
5210
5211 if (sourceWidth == targetWidth && sourceHeight == targetHeight)
5212 {
5213 FrameConverter::subFrame<uint8_t>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, 0u, 0u, 0u, 0u, sourceWidth, sourceHeight, sourcePaddingElements, targetPaddingElements);
5214 return;
5215 }
5216
5217 if (worker && sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5218 {
5219#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5220 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5221 {
5222 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset7BitPrecisionNEON, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5223 return;
5224 }
5225#else
5226 worker->executeFunction(Worker::Function::createStatic(&scale8BitPerChannelSubset<tChannels>, source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, targetHeight);
5227#endif
5228 }
5229 else
5230 {
5231 if (sourceWidth * tChannels >= 16u && targetWidth >= 8u)
5232 {
5233#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5234 if (sourceWidth <= 65535u && sourceHeight <= 65535u && targetWidth <= 65535u && targetHeight <= 65535u)
5235 {
5236 scale8BitPerChannelSubset7BitPrecisionNEON(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, tChannels, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5237 return;
5238 }
5239#endif
5240 }
5241
5242 scale8BitPerChannelSubset<tChannels>(source, target, sourceWidth, sourceHeight, targetWidth, targetHeight, sourceX_s_targetX, sourceY_s_targetY, sourcePaddingElements, targetPaddingElements, 0u, targetHeight);
5243 }
5244}
5245
5246template <unsigned int tChannels>
5247void FrameInterpolatorBilinear::scale8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5248{
5249 ocean_assert(source != nullptr && target != nullptr);
5250 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
5251 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
5252 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5253
5254 const Scalar sourceX_T_targetX = Scalar(sourceX_s_targetX);
5255 const Scalar sourceY_T_targetY = Scalar(sourceY_s_targetY);
5256
5257 /*
5258 * We determine the sub-pixel accurate source location for each target pixel as follows:
5259 *
5260 * Example with a downsampling by factor 4:
5261 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
5262 * targetRow with 3 pixels: | 0 1 2 |
5263 *
5264 * Thus, the source row can be separated into three blocks;
5265 * and we want to extract the color information from the center of the blocks:
5266 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
5267 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 4)
5268 *
5269 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
5270 * and subtract 0.5 again afterwards:
5271 * sourceX = (targetX + 0.5) * sourceX_s_targetX - 0.5
5272 *
5273 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
5274 * (1 + 0.5) * 4 - 0.5 = 5.5
5275 *
5276 *
5277 * Example with a downsampling by factor 3:
5278 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
5279 * targetRow with 3 pixels: | 0 1 2 |
5280 *
5281 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
5282 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 3)
5283 *
5284 * e.g., (0 + 0.5) * 3 - 0.5 = 1
5285 * (1 + 0.5) * 3 - 0.5 = 4
5286 *
5287 *
5288 * Example with a downsampling by factor 2:
5289 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
5290 * targetRow with 3 pixels: | 0 1 2 |
5291 *
5292 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
5293 * targetRow with 3 pixels: | 0 | 1 | 2 | (sourceX_s_targetX = 2)
5294 *
5295 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
5296 * (1 + 0.5) * 2 - 0.5 = 2.5
5297 *
5298 *
5299 * we can simplify the calculation (as we have a constant term):
5300 * sourceX = (sourceX_s_targetX * targetX) + (sourceX_s_targetX * 0.5 - 0.5)
5301 */
5302
5303 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
5304
5305 const Scalar sourceX_T_targetXOffset = sourceX_T_targetX * Scalar(0.5) - Scalar(0.5);
5306 const Scalar sourceY_T_targetYOffset = sourceY_T_targetY * Scalar(0.5) - Scalar(0.5);
5307
5308 const Scalar sourceWidth_1 = Scalar(sourceWidth - 1u);
5309 const Scalar sourceHeight_1 = Scalar(sourceHeight - 1u);
5310
5311 target += (targetWidth * tChannels + targetPaddingElements) * firstTargetRow;
5312
5313 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5314 {
5315 const Scalar sy = minmax(Scalar(0), sourceY_T_targetYOffset + sourceY_T_targetY * Scalar(y), sourceHeight_1);
5316 ocean_assert(sy >= Scalar(0) && sy < Scalar(sourceHeight));
5317
5318 const unsigned int sTop = (unsigned int)sy;
5319 ocean_assert(sy >= Scalar(sTop));
5320
5321 const Scalar ty = sy - Scalar(sTop);
5322 ocean_assert(ty >= 0 && ty <= 1);
5323
5324 const unsigned int factorBottom = (unsigned int)(ty * Scalar(128) + Scalar(0.5));
5325 const unsigned int factorTop = 128u - factorBottom;
5326
5327 const uint8_t* const sourceTop = source + sourceStrideElements * sTop;
5328 const uint8_t* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
5329
5330 for (unsigned int x = 0; x < targetWidth; ++x)
5331 {
5332 const Scalar sx = minmax(Scalar(0), sourceX_T_targetXOffset + sourceX_T_targetX * Scalar(x), sourceWidth_1);
5333 ocean_assert(sx >= Scalar(0) && sx < Scalar(sourceWidth));
5334
5335 const unsigned int sLeft = (unsigned int)sx;
5336 ocean_assert(sx >= Scalar(sLeft));
5337
5338 const Scalar tx = sx - Scalar(sLeft);
5339 ocean_assert(tx >= 0 && tx <= 1);
5340
5341 const unsigned int factorRight = (unsigned int)(tx * Scalar(128) + Scalar(0.5));
5342 const unsigned int factorLeft = 128u - factorRight;
5343
5344 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
5345
5346 const uint8_t* const sourceTopLeft = sourceTop + sLeft * tChannels;
5347 const uint8_t* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
5348
5349 const unsigned int factorTopLeft = factorTop * factorLeft;
5350 const unsigned int factorTopRight = factorTop * factorRight;
5351 const unsigned int factorBottomLeft = factorBottom * factorLeft;
5352 const unsigned int factorBottomRight = factorBottom * factorRight;
5353
5354 for (unsigned int n = 0u; n < tChannels; ++n)
5355 {
5356 target[n] = (uint8_t)((sourceTopLeft[n] * factorTopLeft + sourceTopLeft[sourceRightOffset + n] * factorTopRight
5357 + sourceBottomLeft[n] * factorBottomLeft + sourceBottomLeft[sourceRightOffset + n] * factorBottomRight + 8192u) >> 14u);
5358 }
5359
5360 target += tChannels;
5361 }
5362
5363 target += targetPaddingElements;
5364 }
5365}
5366
5367template <typename T>
5368void FrameInterpolatorBilinear::interpolateRowVertical(const T* sourceRowTop, const T* sourceRowBottom, T* targetRow, const unsigned int elements, const float factorBottom)
5369{
5370 ocean_assert(sourceRowTop != nullptr);
5371 ocean_assert(sourceRowBottom != nullptr);
5372 ocean_assert(targetRow != nullptr);
5373 ocean_assert(elements >= 1u);
5374 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
5375
5376 using FloatType = typename FloatTyper<T>::Type;
5377
5378 const FloatType internalFactorBottom = FloatType(factorBottom);
5379 const FloatType internalFactorTop = FloatType(1.0f - factorBottom);
5380
5381 for (unsigned int n = 0u; n < elements; ++n)
5382 {
5383 targetRow[n] = T(FloatType(sourceRowTop[n]) * internalFactorTop + FloatType(sourceRowBottom[n]) * internalFactorBottom);
5384 }
5385}
5386
5387template <typename T, unsigned int tChannels>
5388void FrameInterpolatorBilinear::interpolateRowHorizontal(const T* extendedSourceRow, T* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
5389{
5390 static_assert(tChannels != 0u, "Invalid channel number!");
5391
5392 ocean_assert(extendedSourceRow != nullptr);
5393 ocean_assert(targetRow != nullptr);
5394 ocean_assert(targetWidth >= 1u);
5395 ocean_assert(interpolationLocations != nullptr);
5396 ocean_assert(interpolationFactorsRight != nullptr);
5397 ocean_assert_and_suppress_unused(channels == tChannels, channels);
5398
5399 using FloatType = typename FloatTyper<T>::Type;
5400
5401 for (unsigned int x = 0u; x < targetWidth; ++x)
5402 {
5403 const FloatType internalFactorRight = FloatType(interpolationFactorsRight[x]);
5404 ocean_assert(internalFactorRight >= FloatType(0) && internalFactorRight <= FloatType(1));
5405
5406 const FloatType internalFactorLeft = FloatType(1.0f - interpolationFactorsRight[x]);
5407
5408 const unsigned int& leftLocation = interpolationLocations[x];
5409 const unsigned int rightLocation = leftLocation + tChannels; // location is defined in relation to elements, not to pixels
5410
5411 for (unsigned int n = 0u; n < tChannels; ++n)
5412 {
5413 targetRow[x * tChannels + n] = T(FloatType(extendedSourceRow[leftLocation + n]) * internalFactorLeft + FloatType(extendedSourceRow[rightLocation + n]) * internalFactorRight);
5414 }
5415 }
5416}
5417
5418#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5419
5420#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5421
5422template <>
5423inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5424{
5425 ocean_assert(source != nullptr && target != nullptr);
5426 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5427 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5428 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5429 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5430 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5431
5432 ocean_assert(sourcePaddingElements == 0u); // not supported
5433 ocean_assert(targetPaddingElements == 0u);
5434
5435 using PixelType = typename DataType<uint8_t, 2u>::Type;
5436
5437 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5438 const PixelType* const sourcePixelData = (const PixelType*)source;
5439
5440 // our offset values for the eight left pixels in relation to the first pixel of the row
5441 unsigned int leftOffsets[8];
5442
5443 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5444 // fixedPointLocation = floatLocation * 2^16
5445 //
5446 // [FEDCBA98, 76543210]
5447 // [pixel , subpixel]
5448 //
5449 // fixedPointLocation = pixel + subpixel / 2^16
5450 //
5451 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5452 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5453
5454 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5455 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5456
5457 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5458 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5459
5460 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5461 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5462
5463 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5464 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5465
5466 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5467 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5468
5469 // we store 4 integers: [0, 0, 0, 0]
5470 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5471
5472 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5473 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5474
5475 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5476 {
5477 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5478
5479 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5480 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5481 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5482
5483 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5484 // factorTop = 128 - factorBottom
5485 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5486
5487 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5488
5489 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5490 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5491
5492 for (unsigned int x = 0; x < targetWidth; x += 8u)
5493 {
5494 if (x + 8u > targetWidth)
5495 {
5496 // the last iteration will not fit into the output frame,
5497 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5498
5499 ocean_assert(x >= 8u && targetWidth > 8u);
5500 const unsigned int newX = targetWidth - 8u;
5501
5502 ocean_assert(x > newX);
5503 targetPixelData -= x - newX;
5504
5505 x = newX;
5506
5507 // the for loop will stop after this iteration
5508 ocean_assert(!(x + 8u < targetWidth));
5509 }
5510
5511
5512 // we need four successive x coordinate floats:
5513 // [x + 3, x + 2, x + 1; x + 0]
5514 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5515 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5516
5517 // we calculate the four source locations for our four target locations
5518 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5519 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5520
5521 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5522 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5523
5524 // now we determine the pixel/integer accurate source locations
5525 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5526 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5527 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5528
5529 // we store the offsets we have calculated
5530 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5531 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5532
5533
5534
5535 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
5536 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
5537
5538 uint8x8x2_t topLeftPixels;
5539 uint8x8x2_t topRightPixels;
5540
5541 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
5542 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
5543
5544 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
5545 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
5546
5547 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
5548 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
5549
5550 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
5551 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
5552
5553 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
5554 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
5555
5556 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
5557 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
5558
5559 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
5560 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
5561
5562 topLeftPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
5563 topRightPixels = vld2_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
5564
5565
5566 // we load the individual pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
5567
5568 uint8x8x2_t bottomLeftPixels;
5569 uint8x8x2_t bottomRightPixels;
5570
5571 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
5572 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
5573
5574 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
5575 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
5576
5577 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
5578 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
5579
5580 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
5581 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
5582
5583 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
5584 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
5585
5586 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
5587 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
5588
5589 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
5590 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
5591
5592 bottomLeftPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
5593 bottomRightPixels = vld2_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
5594
5595
5596
5597 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5598 // we need an accuracy of 7 bits (values between 0 and 128):
5599 // 76 54 32 10
5600 // [F3 F2 F1 F0]
5601 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5602 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5603
5604 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5605 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5606 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5607 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5608
5609
5610
5611 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5612 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
5613 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
5614
5615 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
5616 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
5617
5618 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5619 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5620
5621
5622
5623 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5624 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
5625 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
5626
5627 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
5628 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
5629
5630 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5631 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5632
5633
5634
5635 // finnally we determine the interpolation result between top and bottom row
5636 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
5637 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
5638
5639 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
5640 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
5641
5642
5643 // we narrow down the interpolation results and we store them
5644 uint8x8x2_t result;
5645 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
5646 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
5647
5648 // we write back the results and interleave them automatically
5649 vst2_u8((uint8_t*)targetPixelData, result);
5650
5651 targetPixelData += 8;
5652 }
5653
5654 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5655 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5656
5657 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5658
5659 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5660 {
5661 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5662
5663 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5664 ocean_assert(lastSourcePixelLeft < sourceWidth);
5665 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5666
5667 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5668
5669 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5670 const unsigned int factorLeft = 128u - factorRight;
5671
5672 for (unsigned int c = 0u; c < 2u; ++c)
5673 {
5674 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5675 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5676 }
5677 }
5678 }
5679}
5680
5681#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5682
5683#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5684
5685template <>
5686inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<2u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5687{
5688 ocean_assert(source != nullptr && target != nullptr);
5689 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5690 ocean_assert(sourceHeight >= 0u && sourceHeight <= 65535u);
5691 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u)
5692 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5693 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5694
5695 ocean_assert(sourcePaddingElements == 0u); // not supported
5696 ocean_assert(targetPaddingElements == 0u);
5697
5698 using PixelType = typename DataType<uint8_t, 2u>::Type;
5699
5700 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5701 const PixelType* const sourcePixelData = (const PixelType*)source;
5702
5703 // our offset values for the four left pixels in relation to the first pixel of the row
5704 unsigned int leftOffsets[8];
5705
5706 // our color values of the eight top and bottom pixels (32 bit = 16 bit left and 16 bit right)
5707 unsigned int topPixels[8];
5708 unsigned int bottomPixels[8];
5709
5710 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5711 // fixedPointLocation = floatLocation * 2^16
5712 //
5713 // [FEDCBA98, 76543210]
5714 // [pixel , subpixel]
5715 //
5716 // fixedPointLocation = pixel + subpixel / 2^16
5717 //
5718 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5719 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5720
5721 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5722 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5723
5724 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5725 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5726
5727 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5728 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5729
5730 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5731 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5732
5733 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5734 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5735
5736 // we store 4 integers: [0, 0, 0, 0]
5737 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5738
5739 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5740 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5741
5742 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
5743 {
5744 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
5745
5746 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
5747 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
5748 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
5749
5750 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
5751 // factorTop = 128 - factorBottom
5752 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
5753
5754 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
5755
5756 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
5757 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
5758
5759 for (unsigned int x = 0; x < targetWidth; x += 8u)
5760 {
5761 if (x + 8u > targetWidth)
5762 {
5763 // the last iteration will not fit into the output frame,
5764 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
5765
5766 ocean_assert(x >= 8u && targetWidth > 8u);
5767 const unsigned int newX = targetWidth - 8u;
5768
5769 ocean_assert(x > newX);
5770 targetPixelData -= x - newX;
5771
5772 x = newX;
5773
5774 // the for loop will stop after this iteration
5775 ocean_assert(!(x + 8u < targetWidth));
5776 }
5777
5778
5779 // we need four successive x coordinate floats:
5780 // [x + 3, x + 2, x + 1; x + 0]
5781 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
5782 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
5783
5784 // we calculate the four source locations for our four target locations
5785 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
5786 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
5787
5788 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
5789 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
5790
5791 // now we determine the pixel/integer accurate source locations
5792 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
5793 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
5794 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
5795
5796 // we store the offsets we have calculated
5797 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
5798 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
5799
5800
5801
5802 // we load the left and the right pixels into an intermediate buffer
5803 // with following pattern (with top-left TL, and top-right TR):
5804 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5805 // [TR3 TR3 TL3 TL3 TR2 TR2 TL2 TL2 TR1 TR1 TL1 TL1 TR0 TR0 TL0 TL0]
5806 // [TR7 TR7 TL7 TL7 TR6 TR6 TL6 TL6 TR5 TR5 TL5 TL5 TR4 TR4 TL4 TL4]
5807
5808 for (unsigned int n = 0u; n < 8u; ++n)
5809 {
5810 topPixels[n] = *(unsigned int*)(sourceTopRowPixelData + leftOffsets[n]);
5811 }
5812
5813 const uint16x8_t m128_topPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 0));
5814 const uint16x8_t m128_topPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(topPixels + 4));
5815
5816 for (unsigned int n = 0u; n < 8u; ++n)
5817 {
5818 bottomPixels[n] = *(unsigned int*)(sourceBottomRowPixelData + leftOffsets[n]);
5819 }
5820
5821 const uint16x8_t m128_bottomPixels_0123 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 0));
5822 const uint16x8_t m128_bottomPixels_4567 = vreinterpretq_u16_u32(vld1q_u32(bottomPixels + 4));
5823
5824
5825 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
5826 // we need an accuracy of 7 bits (values between 0 and 128):
5827 // 76 54 32 10
5828 // [F3 F2 F1 F0]
5829 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5830 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
5831
5832 // as we will have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
5833 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
5834 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
5835
5836 // nw we have the interpolation factors for 8 left and 8 right pixels:
5837 // 7 6 5 4 3 2 1 0
5838 // [F7 F6 F5 F4 F3 F2 F1 F0]
5839 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
5840
5841
5842 // we de-interleave the top pixels to left and right pixels:
5843 // F E D C B A 9 8 7 6 5 4 3 2 1 0
5844 // [TL7 TL7 TL6 TL6 TL5 TL5 TL4 TL4 TL3 TL3 TL2 TL2 TL1 TL1 TL0 TL0]
5845 // [TR7 TR7 TR6 TR6 TR5 TR5 TR4 TR4 TR3 TR3 TR2 TR2 TR1 TR1 TR0 TR0]
5846 const uint16x8x2_t m2_128_topPixelsLeftRight = vuzpq_u16(m128_topPixels_0123, m128_topPixels_4567);
5847
5848 // we de-interleave the pixels again to separate channel 0 and channel 1:
5849 // 7 6 5 4 3 2 1 0
5850 // channel 0: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5851 // channel 1: [TL7 TL6 TL5 TL4 TL3 TL2 TL1 TL0]
5852 const uint8x8x2_t m2_64_topPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[0])));
5853 const uint8x8x2_t m2_64_topPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_topPixelsLeftRight.val[1])));
5854
5855 const uint8x8_t& m64_topPixelsLeft_channel_0 = m2_64_topPixelsLeft_channels_01.val[0];
5856 const uint8x8_t& m64_topPixelsLeft_channel_1 = m2_64_topPixelsLeft_channels_01.val[1];
5857
5858 const uint8x8_t& m64_topPixelsRight_channel_0 = m2_64_topPixelsRight_channels_01.val[0];
5859 const uint8x8_t& m64_topPixelsRight_channel_1 = m2_64_topPixelsRight_channels_01.val[1];
5860
5861
5862 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
5863 uint16x8_t m128_muliplication_channel_0 = vmull_u8(m64_topPixelsLeft_channel_0, m64_u_factorsLeft);
5864 uint16x8_t m128_muliplication_channel_1 = vmull_u8(m64_topPixelsLeft_channel_1, m64_u_factorsLeft);
5865
5866 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_topPixelsRight_channel_0, m64_u_factorsRight);
5867 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_topPixelsRight_channel_1, m64_u_factorsRight);
5868
5869 const uint8x8_t m64_topRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5870 const uint8x8_t m64_topRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5871
5872
5873 // we proceed with the bottom pixels (as we did with the top pixels)
5874 const uint16x8x2_t m2_128_bottomPixelsLeftRight = vuzpq_u16(m128_bottomPixels_0123, m128_bottomPixels_4567);
5875
5876 const uint8x8x2_t m2_64_bottomPixelsLeft_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[0])));
5877 const uint8x8x2_t m2_64_bottomPixelsRight_channels_01 = vuzp_u8(vget_low_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])), vget_high_u8(vreinterpretq_u8_u16(m2_128_bottomPixelsLeftRight.val[1])));
5878
5879 const uint8x8_t& m64_bottomPixelsLeft_channel_0 = m2_64_bottomPixelsLeft_channels_01.val[0];
5880 const uint8x8_t& m64_bottomPixelsLeft_channel_1 = m2_64_bottomPixelsLeft_channels_01.val[1];
5881
5882 const uint8x8_t& m64_bottomPixelsRight_channel_0 = m2_64_bottomPixelsRight_channels_01.val[0];
5883 const uint8x8_t& m64_bottomPixelsRight_channel_1 = m2_64_bottomPixelsRight_channels_01.val[1];
5884
5885
5886 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
5887 m128_muliplication_channel_0 = vmull_u8(m64_bottomPixelsLeft_channel_0, m64_u_factorsLeft);
5888 m128_muliplication_channel_1 = vmull_u8(m64_bottomPixelsLeft_channel_1, m64_u_factorsLeft);
5889
5890 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomPixelsRight_channel_0, m64_u_factorsRight);
5891 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomPixelsRight_channel_1, m64_u_factorsRight);
5892
5893 const uint8x8_t m64_bottomRow_channel_0 = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5894 const uint8x8_t m64_bottomRow_channel_1 = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5895
5896
5897 // finnally we determine the interpolation result between top and bottom row
5898 m128_muliplication_channel_0 = vmull_u8(m64_topRow_channel_0, m64_u_factorsTop);
5899 m128_muliplication_channel_1 = vmull_u8(m64_topRow_channel_1, m64_u_factorsTop);
5900
5901 m128_muliplication_channel_0 = vmlal_u8(m128_muliplication_channel_0, m64_bottomRow_channel_0, m64_u_factorsBottom);
5902 m128_muliplication_channel_1 = vmlal_u8(m128_muliplication_channel_1, m64_bottomRow_channel_1, m64_u_factorsBottom);
5903
5904
5905 // we narrow down the interpolation results and we store them
5906 uint8x8x2_t m2_64_result;
5907 m2_64_result.val[0] = vrshrn_n_u16(m128_muliplication_channel_0, 7);
5908 m2_64_result.val[1] = vrshrn_n_u16(m128_muliplication_channel_1, 7);
5909
5910 // we write back the results and interleave them automatically
5911 vst2_u8((uint8_t*)targetPixelData, m2_64_result);
5912
5913 targetPixelData += 8;
5914 }
5915
5916 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
5917 // **TODO** this is just a temporary solution, check how we can avoid this additional step
5918
5919 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
5920
5921 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
5922 {
5923 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
5924
5925 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
5926 ocean_assert(lastSourcePixelLeft < sourceWidth);
5927 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
5928
5929 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
5930
5931 const unsigned int factorRight = factorRight_fixed16 >> 9u;
5932 const unsigned int factorLeft = 128u - factorRight;
5933
5934 for (unsigned int c = 0u; c < 2u; ++c)
5935 {
5936 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
5937 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
5938 }
5939 }
5940 }
5941}
5942
5943#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_SLIGHTLY_DIFFERENT_APPROACH
5944
5945#ifdef OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
5946
5947template <>
5948inline void FrameInterpolatorBilinear::scale8BitPerChannelSubset7BitPrecisionNEON<3u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
5949{
5950 ocean_assert(source != nullptr && target != nullptr);
5951 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
5952 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
5953 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
5954 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
5955 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
5956
5957 ocean_assert(sourcePaddingElements == 0u); // not supported
5958 ocean_assert(targetPaddingElements == 0u);
5959
5960 using PixelType = typename DataType<uint8_t, 3u>::Type;
5961
5962 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
5963 const PixelType* const sourcePixelData = (const PixelType*)source;
5964
5965 // our offset values for the eight left pixels in relation to the first pixel of the row
5966 unsigned int leftOffsets[8];
5967
5968 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
5969 // fixedPointLocation = floatLocation * 2^16
5970 //
5971 // [FEDCBA98, 76543210]
5972 // [pixel , subpixel]
5973 //
5974 // fixedPointLocation = pixel + subpixel / 2^16
5975 //
5976 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
5977 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
5978
5979 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
5980 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
5981
5982 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
5983 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
5984
5985 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5986 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
5987
5988 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
5989 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
5990
5991 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
5992 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
5993
5994 // we store 4 integers: [0, 0, 0, 0]
5995 const int32x4_t m128_s_zero = vdupq_n_s32(0);
5996
5997 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
5998 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
5999
6000 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6001 {
6002 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6003
6004 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6005 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6006 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6007
6008 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6009 // factorTop = 128 - factorBottom
6010 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6011
6012 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6013
6014 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6015 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6016
6017 for (unsigned int x = 0; x < targetWidth; x += 8u)
6018 {
6019 if (x + 8u > targetWidth)
6020 {
6021 // the last iteration will not fit into the output frame,
6022 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6023
6024 ocean_assert(x >= 8u && targetWidth > 8u);
6025 const unsigned int newX = targetWidth - 8u;
6026
6027 ocean_assert(x > newX);
6028 targetPixelData -= x - newX;
6029
6030 x = newX;
6031
6032 // the for loop will stop after this iteration
6033 ocean_assert(!(x + 8u < targetWidth));
6034 }
6035
6036
6037 // we need four successive x coordinate floats:
6038 // [x + 3, x + 2, x + 1; x + 0]
6039 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6040 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6041
6042 // we calculate the four source locations for our four target locations
6043 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6044 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6045
6046 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6047 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6048
6049 // now we determine the pixel/integer accurate source locations
6050 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6051 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6052 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6053
6054 // we store the offsets we have calculated
6055 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6056 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6057
6058
6059
6060 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6061 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6062
6063 uint8x8x3_t topLeftPixels;
6064 uint8x8x3_t topRightPixels;
6065
6066 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6067 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6068
6069 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6070 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6071
6072 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6073 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6074
6075 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6076 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6077
6078 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6079 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6080
6081 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6082 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6083
6084 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6085 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6086
6087 topLeftPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6088 topRightPixels = vld3_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6089
6090
6091 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6092
6093 uint8x8x3_t bottomLeftPixels;
6094 uint8x8x3_t bottomRightPixels;
6095
6096 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6097 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6098
6099 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6100 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6101
6102 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6103 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6104
6105 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6106 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6107
6108 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6109 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6110
6111 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6112 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6113
6114 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6115 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6116
6117 bottomLeftPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6118 bottomRightPixels = vld3_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6119
6120
6121
6122 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6123 // we need an accuracy of 7 bits (values between 0 and 128):
6124 // 76 54 32 10
6125 // [F3 F2 F1 F0]
6126 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6127 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6128
6129 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6130 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6131 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6132 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6133
6134
6135
6136 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6137 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6138 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6139 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6140
6141 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6142 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6143 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6144
6145 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6146 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6147 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6148
6149
6150
6151 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6152 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6153 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6154 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6155
6156 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6157 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6158 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6159
6160 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6161 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6162 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6163
6164
6165
6166 // finnally we determine the interpolation result between top and bottom row
6167 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6168 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6169 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6170
6171 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6172 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6173 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6174
6175
6176 // we narrow down the interpolation results and we store them
6177 uint8x8x3_t result;
6178 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6179 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6180 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6181
6182 // we write back the results and interleave them automatically
6183 vst3_u8((uint8_t*)targetPixelData, result);
6184
6185 targetPixelData += 8;
6186 }
6187
6188 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6189 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6190
6191 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6192
6193 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6194 {
6195 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6196
6197 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6198 ocean_assert(lastSourcePixelLeft < sourceWidth);
6199 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6200
6201 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6202
6203 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6204 const unsigned int factorLeft = 128u - factorRight;
6205
6206 for (unsigned int c = 0u; c < 3u; ++c)
6207 {
6208 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * (128u - factorBottom)
6209 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6210 }
6211 }
6212 }
6213}
6214
6215#endif // OCEAN_WE_KEEP_THIS_IMPLEMENTATION_AS_WE_NEED_THIS_TO_FOR_A_NEW_NEON_IMPLEMENTATION
6216
6217#ifdef OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6218
6219/// \cond DOXYGEN_DO_NOT_DOCUMENT
6220
6221template <>
6222inline void FrameInterpolatorBilinear::resize8BitPerChannelSubset7BitPrecisionNEON<4u, 8u>(const uint8_t* source, uint8_t* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6223{
6224 ocean_assert(source != nullptr && target != nullptr);
6225 ocean_assert(sourceWidth >= 2u && sourceWidth <= 65535u);
6226 ocean_assert(sourceHeight >= 1u && sourceHeight <= 65535u);
6227 ocean_assert(targetWidth >= 8u && targetWidth <= 65535u);
6228 ocean_assert(targetHeight >= 1u && targetHeight <= 65535u);
6229 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6230
6231 ocean_assert(sourcePaddingElements == 0u); // not supported
6232 ocean_assert(targetPaddingElements == 0u);
6233
6234 using PixelType = typename DataType<uint8_t, 4u>::Type;
6235
6236 PixelType* targetPixelData = (PixelType*)target + firstTargetRow * targetWidth;
6237 const PixelType* const sourcePixelData = (const PixelType*)source;
6238
6239 // our offset values for the eight left pixels in relation to the first pixel of the row
6240 unsigned int leftOffsets[8];
6241
6242 // this function uses fixed point numbers with 16 bit for the calculation of const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6243 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6244
6245 // this function uses fixed point numbers with 16 bit for the calculation of the interpolation positions and factors:
6246 // fixedPointLocation = floatLocation * 2^16
6247 //
6248 // [FEDCBA98, 76543210]
6249 // [pixel , subpixel]
6250 //
6251 // fixedPointLocation = pixel + subpixel / 2^16
6252 //
6253 // Thus, the upper 16 bit represent the location of e.g., the left pixel (for the linear interpolation)
6254 // while the lower 16 bit represent one of both interpolation factors (and 2^16 - subpixel represents the second interpolation factor)
6255
6256 const unsigned int sourceX_T_targetX_fixed16 = (unsigned int)(double(0x10000u) * sourceX_s_targetX + 0.5);
6257 const unsigned int sourceY_T_targetY_fixed16 = (unsigned int)(double(0x10000u) * sourceY_s_targetY + 0.5);
6258
6259 const int targetOffsetX_fixed16 = (int)(double(0x10000u) * ((sourceX_s_targetX * 0.5) - 0.5) + 0.5);
6260 const int targetOffsetY_fixed16 = (int)(double(0x10000u) * ((sourceY_s_targetY * 0.5) - 0.5) + 0.5);
6261
6262 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6263 const uint32x4_t m128_u_sourceX_T_targetX_fixed16 = vdupq_n_u32(sourceX_T_targetX_fixed16);
6264
6265 // we store 4 integers: [sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16, sourceX_T_targetX_fixed16]
6266 const int32x4_t m128_s_targetOffsetX_fixed16 = vdupq_n_s32(targetOffsetX_fixed16);
6267
6268 // we store 4 integers: [sourceWidth - 2, sourceWidth - 2, sourceWidth - 2, sourceWidth - 2]
6269 const uint32x4_t m128_u_sourceWidth_2 = vdupq_n_u32(sourceWidth - 2u);
6270
6271 // we store 4 integers: [0, 0, 0, 0]
6272 const int32x4_t m128_s_zero = vdupq_n_s32(0);
6273
6274 const unsigned int u_0123[4] = {0u, 1u, 2u, 3u};
6275 const uint32x4_t m128_u_0123 = vld1q_u32(u_0123);
6276
6277 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6278 {
6279 const unsigned int sourceY_fixed16 = minmax<int>(0, targetOffsetY_fixed16 + int(sourceY_T_targetY_fixed16 * y), (sourceHeight - 1u) << 16u);
6280
6281 const unsigned int sourceRowTop = sourceY_fixed16 >> 16u; // we must not round here
6282 const unsigned int factorBottom_fixed16 = sourceY_fixed16 & 0x0000FFFFu;
6283 const unsigned int factorBottom = factorBottom_fixed16 >> 9u;
6284
6285 const uint8x8_t m64_u_factorsBottom = vdup_n_u8(factorBottom);
6286 // factorTop = 128 - factorBottom
6287 const uint8x8_t m64_u_factorsTop = vdup_n_u8(128u - factorBottom);
6288
6289 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6290
6291 const PixelType* const sourceTopRowPixelData = sourcePixelData + sourceRowTop * sourceWidth;
6292 const PixelType* const sourceBottomRowPixelData = sourcePixelData + sourceRowBottom * sourceWidth;
6293
6294 for (unsigned int x = 0; x < targetWidth; x += 8u)
6295 {
6296 if (x + 8u > targetWidth)
6297 {
6298 // the last iteration will not fit into the output frame,
6299 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6300
6301 ocean_assert(x >= 8u && targetWidth > 8u);
6302 const unsigned int newX = targetWidth - 8u;
6303
6304 ocean_assert(x > newX);
6305 targetPixelData -= x - newX;
6306
6307 x = newX;
6308
6309 // the for loop will stop after this iteration
6310 ocean_assert(!(x + 8u < targetWidth));
6311 }
6312
6313
6314 // we need four successive x coordinate floats:
6315 // [x + 3, x + 2, x + 1; x + 0]
6316 const uint32x4_t m128_u_x_0123 = vaddq_u32(vdupq_n_u32(x), m128_u_0123);
6317 const uint32x4_t m128_u_x_4567 = vaddq_u32(vdupq_n_u32(x + 4u), m128_u_0123);
6318
6319 // we calculate the four source locations for our four target locations
6320 const int32x4_t m128_s_sourceX_0123_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_0123))));
6321 const uint32x4_t m128_u_sourceX_0123_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_0123_fixed16);
6322
6323 const int32x4_t m128_s_sourceX_4567_fixed16 = vmaxq_s32(m128_s_zero, vaddq_s32(m128_s_targetOffsetX_fixed16, vreinterpretq_s32_u32(vmulq_u32(m128_u_sourceX_T_targetX_fixed16, m128_u_x_4567))));
6324 const uint32x4_t m128_u_sourceX_4567_fixed16 = vreinterpretq_u32_s32(m128_s_sourceX_4567_fixed16);
6325
6326 // now we determine the pixel/integer accurate source locations
6327 // m128_u_left = min(floor(m128_f_sourceX), sourceWidth - 2)
6328 const uint32x4_t m128_u_left_0123 = vminq_u32(vshrq_n_u32(m128_u_sourceX_0123_fixed16, 16), m128_u_sourceWidth_2); // not vrshrq_n_u32 as we must not round here
6329 const uint32x4_t m128_u_left_4567 = vminq_u32(vshrq_n_u32(m128_u_sourceX_4567_fixed16, 16), m128_u_sourceWidth_2);
6330
6331 // we store the offsets we have calculated
6332 vst1q_u32(leftOffsets + 0, m128_u_left_0123);
6333 vst1q_u32(leftOffsets + 4, m128_u_left_4567);
6334
6335
6336
6337 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the top-left and top-right pixels)
6338 // note: loading of each pixel individually is significantly slower than loading two neighboring pixels within one iteration
6339
6340 uint8x8x4_t topLeftPixels;
6341 uint8x8x4_t topRightPixels;
6342
6343 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 0), topLeftPixels, 0);
6344 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[0u] + 1), topRightPixels, 0);
6345
6346 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 0), topLeftPixels, 1);
6347 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[1u] + 1), topRightPixels, 1);
6348
6349 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 0), topLeftPixels, 2);
6350 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[2u] + 1), topRightPixels, 2);
6351
6352 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 0), topLeftPixels, 3);
6353 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[3u] + 1), topRightPixels, 3);
6354
6355 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 0), topLeftPixels, 4);
6356 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[4u] + 1), topRightPixels, 4);
6357
6358 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 0), topLeftPixels, 5);
6359 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[5u] + 1), topRightPixels, 5);
6360
6361 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 0), topLeftPixels, 6);
6362 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[6u] + 1), topRightPixels, 6);
6363
6364 topLeftPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 0), topLeftPixels, 7);
6365 topRightPixels = vld4_lane_u8((uint8_t*)(sourceTopRowPixelData + leftOffsets[7u] + 1), topRightPixels, 7);
6366
6367
6368 // we load the individal pixels to our four (de-interleaved) 8x8 bit registers (we do this for the bottom-left and bottom-right pixels)
6369
6370 uint8x8x4_t bottomLeftPixels;
6371 uint8x8x4_t bottomRightPixels;
6372
6373 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 0), bottomLeftPixels, 0);
6374 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[0u] + 1), bottomRightPixels, 0);
6375
6376 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 0), bottomLeftPixels, 1);
6377 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[1u] + 1), bottomRightPixels, 1);
6378
6379 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 0), bottomLeftPixels, 2);
6380 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[2u] + 1), bottomRightPixels, 2);
6381
6382 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 0), bottomLeftPixels, 3);
6383 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[3u] + 1), bottomRightPixels, 3);
6384
6385 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 0), bottomLeftPixels, 4);
6386 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[4u] + 1), bottomRightPixels, 4);
6387
6388 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 0), bottomLeftPixels, 5);
6389 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[5u] + 1), bottomRightPixels, 5);
6390
6391 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 0), bottomLeftPixels, 6);
6392 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[6u] + 1), bottomRightPixels, 6);
6393
6394 bottomLeftPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 0), bottomLeftPixels, 7);
6395 bottomRightPixels = vld4_lane_u8((uint8_t*)(sourceBottomRowPixelData + leftOffsets[7u] + 1), bottomRightPixels, 7);
6396
6397
6398
6399 // we determine the multiplication factors for the right pixels - which are already stored in the lower 16 bits
6400 // we need an accuracy of 7 bits (values between 0 and 128):
6401 // 76 54 32 10
6402 // [F3 F2 F1 F0]
6403 const uint16x4_t m64_u_factorsRight_0123 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_0123_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6404 const uint16x4_t m64_u_factorsRight_4567 = vrshrn_n_u32(vandq_u32(m128_u_sourceX_4567_fixed16, vdupq_n_u32(0x0000FFFFu)), 9);
6405
6406 // as we have the pixel information de-interleaved, we can store all 8 interpolation factors together into one 8x8 bit register:
6407 const uint16x8_t m128_u_factorsRight = vcombine_u16(m64_u_factorsRight_0123, m64_u_factorsRight_4567);
6408 const uint8x8_t m64_u_factorsRight = vqmovn_u16(m128_u_factorsRight);
6409 const uint8x8_t m64_u_factorsLeft = vsub_u8(vdup_n_u8(128u), m64_u_factorsRight);
6410
6411
6412
6413 // we determine the intermediate interpolation results for the top row (and we narrow down the 16 bit results 8 bit results)
6414 uint16x8_t m128_muliplicationChannel_0 = vmull_u8(topLeftPixels.val[0], m64_u_factorsLeft);
6415 uint16x8_t m128_muliplicationChannel_1 = vmull_u8(topLeftPixels.val[1], m64_u_factorsLeft);
6416 uint16x8_t m128_muliplicationChannel_2 = vmull_u8(topLeftPixels.val[2], m64_u_factorsLeft);
6417 uint16x8_t m128_muliplicationChannel_3 = vmull_u8(topLeftPixels.val[3], m64_u_factorsLeft);
6418
6419 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, topRightPixels.val[0], m64_u_factorsRight);
6420 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, topRightPixels.val[1], m64_u_factorsRight);
6421 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, topRightPixels.val[2], m64_u_factorsRight);
6422 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, topRightPixels.val[3], m64_u_factorsRight);
6423
6424 uint8x8_t m64_topRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6425 uint8x8_t m64_topRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6426 uint8x8_t m64_topRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6427 uint8x8_t m64_topRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6428
6429
6430
6431 // we determine the intermediate interpolation results for the bottom row (and we narrow down the 16 bit results 8 bit results)
6432 m128_muliplicationChannel_0 = vmull_u8(bottomLeftPixels.val[0], m64_u_factorsLeft);
6433 m128_muliplicationChannel_1 = vmull_u8(bottomLeftPixels.val[1], m64_u_factorsLeft);
6434 m128_muliplicationChannel_2 = vmull_u8(bottomLeftPixels.val[2], m64_u_factorsLeft);
6435 m128_muliplicationChannel_3 = vmull_u8(bottomLeftPixels.val[3], m64_u_factorsLeft);
6436
6437 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, bottomRightPixels.val[0], m64_u_factorsRight);
6438 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, bottomRightPixels.val[1], m64_u_factorsRight);
6439 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, bottomRightPixels.val[2], m64_u_factorsRight);
6440 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, bottomRightPixels.val[3], m64_u_factorsRight);
6441
6442 uint8x8_t m64_bottomRowChannel_0 = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6443 uint8x8_t m64_bottomRowChannel_1 = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6444 uint8x8_t m64_bottomRowChannel_2 = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6445 uint8x8_t m64_bottomRowChannel_3 = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6446
6447
6448
6449 // finnally we determine the interpolation result between top and bottom row
6450 m128_muliplicationChannel_0 = vmull_u8(m64_topRowChannel_0, m64_u_factorsTop);
6451 m128_muliplicationChannel_1 = vmull_u8(m64_topRowChannel_1, m64_u_factorsTop);
6452 m128_muliplicationChannel_2 = vmull_u8(m64_topRowChannel_2, m64_u_factorsTop);
6453 m128_muliplicationChannel_3 = vmull_u8(m64_topRowChannel_3, m64_u_factorsTop);
6454
6455 m128_muliplicationChannel_0 = vmlal_u8(m128_muliplicationChannel_0, m64_bottomRowChannel_0, m64_u_factorsBottom);
6456 m128_muliplicationChannel_1 = vmlal_u8(m128_muliplicationChannel_1, m64_bottomRowChannel_1, m64_u_factorsBottom);
6457 m128_muliplicationChannel_2 = vmlal_u8(m128_muliplicationChannel_2, m64_bottomRowChannel_2, m64_u_factorsBottom);
6458 m128_muliplicationChannel_3 = vmlal_u8(m128_muliplicationChannel_3, m64_bottomRowChannel_3, m64_u_factorsBottom);
6459
6460
6461 // we narrow down the interpolation results and we store them
6462 uint8x8x4_t result;
6463 result.val[0] = vrshrn_n_u16(m128_muliplicationChannel_0, 7);
6464 result.val[1] = vrshrn_n_u16(m128_muliplicationChannel_1, 7);
6465 result.val[2] = vrshrn_n_u16(m128_muliplicationChannel_2, 7);
6466 result.val[3] = vrshrn_n_u16(m128_muliplicationChannel_3, 7);
6467
6468 // we write back the results and interleave them automatically
6469 vst4_u8((uint8_t*)targetPixelData, result);
6470
6471 targetPixelData += 8;
6472 }
6473
6474 // we need to process the last pixel again, as this pixel may have received wrong interpolation factors as we always load two successive pixels into our NEON registers
6475 // **TODO** this is just a temporary solution, check how we can avoid this additional step
6476
6477 const unsigned int firstInvalidTargetX = (((sourceWidth - 1u) << 16u) - targetOffsetX_fixed16) / sourceX_T_targetX_fixed16;
6478
6479 for (unsigned int x = firstInvalidTargetX; x < targetWidth; ++x)
6480 {
6481 const unsigned int lastSourcePixelPosition_fixed16 = minmax<int>(0, targetOffsetX_fixed16 + int(sourceX_T_targetX_fixed16 * x), (sourceWidth - 1u) << 16u);
6482
6483 const unsigned int lastSourcePixelLeft = lastSourcePixelPosition_fixed16 >> 16u;
6484 ocean_assert(lastSourcePixelLeft < sourceWidth);
6485 const unsigned int lastSourcePixelRight = min(lastSourcePixelLeft + 1u, sourceWidth - 1u);
6486
6487 const unsigned int factorRight_fixed16 = lastSourcePixelPosition_fixed16 & 0x0000FFFFu;
6488
6489 const unsigned int factorRight = factorRight_fixed16 >> 9u;
6490 const unsigned int factorLeft = 128u - factorRight;
6491
6492 for (unsigned int c = 0u; c < 4u; ++c)
6493 {
6494 ((uint8_t*)(targetPixelData - (targetWidth - x)))[c] = ((((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceTopRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorTop
6495 + (((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelLeft))[c] * factorLeft + ((const uint8_t*)(sourceBottomRowPixelData + lastSourcePixelRight))[c] * factorRight) * factorBottom + 8192u) >> 14u;
6496 }
6497 }
6498 }
6499}
6500
6501/// \endcond
6502
6503#endif // OCEAN_WE_KEEP_ALSO_THIS_SLOW_IMPLEMENTATION_SHOWING_A_MORE_GENERIC_APPROACH
6504
6505template <>
6506inline void FrameInterpolatorBilinear::interpolateRowVerticalNEON<float>(const float* sourceRowTop, const float* sourceRowBottom, float* targetRow, const unsigned int elements, const float factorBottom)
6507{
6508 ocean_assert(sourceRowTop != nullptr);
6509 ocean_assert(sourceRowBottom != nullptr);
6510 ocean_assert(targetRow != nullptr);
6511 ocean_assert(elements >= 16u);
6512 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6513
6514 // [1.0f, 1.0f, 1.0f, 1.0f]
6515 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6516
6517 const float32x4_t factorsBottom_f_32x4 = vdupq_n_f32(factorBottom);
6518 const float32x4_t factorsTop_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsBottom_f_32x4); // factorTop = 1 - factorBottom
6519
6520 for (unsigned int n = 0u; n < elements; n += 16u)
6521 {
6522 if (n + 16u > elements)
6523 {
6524 // the last iteration will not fit into the output frame,
6525 // so we simply shift x left by some elements (at most 15) and we will calculate some elements again
6526
6527 ocean_assert(n >= 16u && elements > 16u);
6528 const unsigned int offset = n - (elements - 16u);
6529 ocean_assert(offset < 16u);
6530
6531 sourceRowTop -= offset;
6532 sourceRowBottom -= offset;
6533 targetRow -= offset;
6534
6535 // the for loop will stop after this iteration
6536 ocean_assert(!(n + 16u < elements));
6537 }
6538
6539 // loading the next four 32 bit values from the top and bottom row
6540 const float32x4_t top_03_32x4 = vld1q_f32(sourceRowTop + 0);
6541 const float32x4_t top_47_32x4 = vld1q_f32(sourceRowTop + 4);
6542 const float32x4_t top_8B_32x4 = vld1q_f32(sourceRowTop + 8);
6543 const float32x4_t top_CF_32x4 = vld1q_f32(sourceRowTop + 12);
6544
6545 const float32x4_t bottom_03_32x4 = vld1q_f32(sourceRowBottom + 0);
6546 const float32x4_t bottom_47_32x4 = vld1q_f32(sourceRowBottom + 4);
6547 const float32x4_t bottom_8B_32x4 = vld1q_f32(sourceRowBottom + 8);
6548 const float32x4_t bottom_CF_32x4 = vld1q_f32(sourceRowBottom + 12);
6549
6550 // interpolatedRow_32x4 = top_32x4 * factorsTop + bottom_32x4 * factorsBottom
6551 float32x4_t interpolatedRow_03_32x4 = vmulq_f32(top_03_32x4, factorsTop_f_32x4);
6552 float32x4_t interpolatedRow_47_32x4 = vmulq_f32(top_47_32x4, factorsTop_f_32x4);
6553 float32x4_t interpolatedRow_8B_32x4 = vmulq_f32(top_8B_32x4, factorsTop_f_32x4);
6554 float32x4_t interpolatedRow_CF_32x4 = vmulq_f32(top_CF_32x4, factorsTop_f_32x4);
6555
6556 interpolatedRow_03_32x4 = vmlaq_f32(interpolatedRow_03_32x4, bottom_03_32x4, factorsBottom_f_32x4);
6557 interpolatedRow_47_32x4 = vmlaq_f32(interpolatedRow_47_32x4, bottom_47_32x4, factorsBottom_f_32x4);
6558 interpolatedRow_8B_32x4 = vmlaq_f32(interpolatedRow_8B_32x4, bottom_8B_32x4, factorsBottom_f_32x4);
6559 interpolatedRow_CF_32x4 = vmlaq_f32(interpolatedRow_CF_32x4, bottom_CF_32x4, factorsBottom_f_32x4);
6560
6561 // writing back the four interpolated 32 bit results
6562 vst1q_f32(targetRow + 0, interpolatedRow_03_32x4);
6563 vst1q_f32(targetRow + 4, interpolatedRow_47_32x4);
6564 vst1q_f32(targetRow + 8, interpolatedRow_8B_32x4);
6565 vst1q_f32(targetRow + 12, interpolatedRow_CF_32x4);
6566
6567 sourceRowTop += 16;
6568 sourceRowBottom += 16;
6569 targetRow += 16;
6570 }
6571}
6572
6573template <>
6574inline void FrameInterpolatorBilinear::interpolateRowHorizontalNEON<float, 1u>(const float* extendedSourceRow, float* targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int* interpolationLocations, const float* interpolationFactorsRight)
6575{
6576 ocean_assert(extendedSourceRow != nullptr);
6577 ocean_assert(targetRow != nullptr);
6578 ocean_assert(targetWidth >= 8u);
6579 ocean_assert(interpolationLocations != nullptr);
6580 ocean_assert(interpolationFactorsRight != nullptr);
6581
6582 ocean_assert_and_suppress_unused(channels == 1u, channels);
6583
6584 // [1.0f, 1.0f, 1.0f, 1.0f]
6585 const float32x4_t constant_1_f_32x4 = vdupq_n_f32(1.0f);
6586
6587 for (unsigned int x = 0; x < targetWidth; x += 8u)
6588 {
6589 if (x + 8u > targetWidth)
6590 {
6591 // the last iteration will not fit into the output frame,
6592 // so we simply shift x left by some pixels (at most 7) and we will calculate some pixels again
6593
6594 ocean_assert(x >= 8u && targetWidth > 8u);
6595 const unsigned int newX = targetWidth - 8u;
6596
6597 ocean_assert(x > newX);
6598 const unsigned int offset = x - newX;
6599
6600 targetRow -= offset;
6601 interpolationLocations -= offset;
6602 interpolationFactorsRight -= offset;
6603
6604 x = newX;
6605
6606 // the for loop will stop after this iteration
6607 ocean_assert(!(x + 8u < targetWidth));
6608 }
6609
6610 // we load the left and the right pixels (for four resulting target pixels)
6611
6612 const float32x2_t pixel_0_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[0]);
6613 const float32x2_t pixel_1_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[1]);
6614 const float32x4_t pixel_01_f_32x4 = vcombine_f32(pixel_0_f_32x2, pixel_1_f_32x2);
6615
6616 const float32x2_t pixel_2_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[2]);
6617 const float32x2_t pixel_3_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[3]);
6618 const float32x4_t pixel_23_f_32x4 = vcombine_f32(pixel_2_f_32x2, pixel_3_f_32x2);
6619
6620 const float32x2_t pixel_4_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[4]);
6621 const float32x2_t pixel_5_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[5]);
6622 const float32x4_t pixel_45_f_32x4 = vcombine_f32(pixel_4_f_32x2, pixel_5_f_32x2);
6623
6624 const float32x2_t pixel_6_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[6]);
6625 const float32x2_t pixel_7_f_32x2 = vld1_f32(extendedSourceRow + interpolationLocations[7]);
6626 const float32x4_t pixel_67_f_32x4 = vcombine_f32(pixel_6_f_32x2, pixel_7_f_32x2);
6627
6628 const float32x4_t factorsRight_0123_f_32x4 = vld1q_f32(interpolationFactorsRight + 0);
6629 const float32x4_t factorsLeft_0123_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_0123_f_32x4);
6630 const float32x4x2_t factorsLeftRight_0123_f_32x4_2 = vzipq_f32(factorsLeft_0123_f_32x4, factorsRight_0123_f_32x4);
6631
6632 const float32x4_t factorsRight_4567_f_32x4 = vld1q_f32(interpolationFactorsRight + 4);
6633 const float32x4_t factorsLeft_4567_f_32x4 = vsubq_f32(constant_1_f_32x4, factorsRight_4567_f_32x4);
6634 const float32x4x2_t factorsLeftRight_4567_f_32x4_2 = vzipq_f32(factorsLeft_4567_f_32x4, factorsRight_4567_f_32x4);
6635
6636 const float32x4_t multiplied_01_f_32x4 = vmulq_f32(pixel_01_f_32x4, factorsLeftRight_0123_f_32x4_2.val[0]);
6637 const float32x4_t multiplied_23_f_32x4 = vmulq_f32(pixel_23_f_32x4, factorsLeftRight_0123_f_32x4_2.val[1]);
6638
6639 const float32x4_t multiplied_45_f_32x4 = vmulq_f32(pixel_45_f_32x4, factorsLeftRight_4567_f_32x4_2.val[0]);
6640 const float32x4_t multiplied_67_f_32x4 = vmulq_f32(pixel_67_f_32x4, factorsLeftRight_4567_f_32x4_2.val[1]);
6641
6642 const float32x2_t result_01_f_32x2 = vpadd_f32(vget_low_f32(multiplied_01_f_32x4), vget_high_f32(multiplied_01_f_32x4));
6643 const float32x2_t result_23_f_32x2 = vpadd_f32(vget_low_f32(multiplied_23_f_32x4), vget_high_f32(multiplied_23_f_32x4));
6644
6645 const float32x2_t result_45_f_32x2 = vpadd_f32(vget_low_f32(multiplied_45_f_32x4), vget_high_f32(multiplied_45_f_32x4));
6646 const float32x2_t result_67_f_32x2 = vpadd_f32(vget_low_f32(multiplied_67_f_32x4), vget_high_f32(multiplied_67_f_32x4));
6647
6648 const float32x4_t result_0123_f_32x4 = vcombine_f32(result_01_f_32x2, result_23_f_32x2);
6649 const float32x4_t result_4567_f_32x4 = vcombine_f32(result_45_f_32x2, result_67_f_32x2);
6650
6651 vst1q_f32(targetRow + 0, result_0123_f_32x4);
6652 vst1q_f32(targetRow + 4, result_4567_f_32x4);
6653
6654 targetRow += 8;
6655 interpolationLocations += 8;
6656 interpolationFactorsRight += 8;
6657 }
6658}
6659
6660template <>
6661inline void FrameInterpolatorBilinear::scaleSubset<float, float, 1u>(const float* source, float* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6662{
6663 ocean_assert(source != nullptr && target != nullptr);
6664 ocean_assert(sourceWidth >= 1u && sourceHeight >= 1u);
6665 ocean_assert(targetWidth >= 1u && targetHeight >= 1u);
6666 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6667
6668 ocean_assert(sourceWidth != targetWidth || sourceHeight != targetHeight);
6669
6670 const unsigned int sourceStrideElements = sourceWidth * 1u + sourcePaddingElements;
6671 const unsigned int targetStrideElements = targetWidth * 1u + targetPaddingElements;
6672
6673 using InterpolateRowVerticalFunction = void (*)(const float*, const float*, float*, const unsigned int, const float);
6674 using InterpolateRowHorizontalFunction = void (*)(const float*, float*, const unsigned int, const unsigned int, const unsigned int*, const float*);
6675
6676 InterpolateRowVerticalFunction interpolateRowVerticalFunction = interpolateRowVertical<float>;
6677 InterpolateRowHorizontalFunction interpolateRowHorizontalFunction = interpolateRowHorizontal<float, 1u>;
6678
6679 if (sourceWidth * 1u >= 16u)
6680 {
6681 interpolateRowVerticalFunction = interpolateRowVerticalNEON<float>;
6682 }
6683
6684 if (targetWidth >= 8u)
6685 {
6686 interpolateRowHorizontalFunction = interpolateRowHorizontalNEON<float, 1u>;
6687 }
6688
6689 target += targetStrideElements * firstTargetRow;
6690
6691 const float sourceX_T_targetX = float(sourceX_s_targetX);
6692 const float sourceY_T_targetY = float(sourceY_s_targetY);
6693
6694 // See the generic template function for a detailed documentation regarding interpolation factors.
6695
6696 Memory memoryIntermediateExtendedRow;
6697 Memory memoryHorizontalInterpolationLocations;
6698 Memory memoryHorizontalInterpolationFactorsRight;
6699
6700 if (sourceWidth != targetWidth)
6701 {
6702 // in case we are scaling the width of the frame, we use an intermediate buffer and pre-calculated interpolation locations and factors
6703
6704 memoryIntermediateExtendedRow = Memory::create<float>(sourceWidth + 1u); // one additional pixel
6705
6706 memoryHorizontalInterpolationLocations = Memory::create<unsigned int>(targetWidth); // one offset for each target pixel
6707
6708 memoryHorizontalInterpolationFactorsRight = Memory::create<float>(targetWidth); // one factors (right) for each target pixel
6709 }
6710
6711 if (memoryHorizontalInterpolationLocations)
6712 {
6713 ocean_assert(memoryHorizontalInterpolationFactorsRight);
6714
6715 if (targetWidth >= 4u)
6716 {
6717 const float32x4_t sourceX_T_targetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX);
6718 const float32x4_t targetOffsetX_f_32x4 = vdupq_n_f32(sourceX_T_targetX * 0.5f - 0.5f);
6719
6720 // [0.0f, 0.0f, 0.0f, 0.0f]
6721 const float32x4_t constant_0_f_32x4 = vdupq_n_f32(0);
6722
6723 // [4.0f, 4.0f, 4.0f, 4.0f]
6724 const float32x4_t constant_4_f_32x4 = vdupq_n_f32(4.0f);
6725
6726 // we store 4 integers: [sourceWidth - 1, sourceWidth - 1, sourceWidth - 1, sourceWidth - 1]
6727 const uint32x4_t sourceWidth_1_u_32x4 = vdupq_n_u32(sourceWidth - 1u);
6728
6729 // [0.0f, 1.0f, 2.0f, 3.0f]
6730 const float f_0123[4] = {0.0f, 1.0f, 2.0f, 3.0f};
6731 float32x4_t x_0123_f_32x4 = vld1q_f32(f_0123);
6732
6733 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6734
6735 for (unsigned int x = 0u; x < targetWidth; x += 4u)
6736 {
6737 if (x + 4u > targetWidth)
6738 {
6739 // the last iteration will not fit into the output frame,
6740 // so we simply shift x left by some pixels (at most 3) and we will calculate some pixels again
6741
6742 ocean_assert(x >= 4u && targetWidth > 4u);
6743 const unsigned int newX = targetWidth - 4u;
6744
6745 ocean_assert(x > newX);
6746 const unsigned int offset = x - newX;
6747
6748 x = newX;
6749
6750 x_0123_f_32x4 = vsubq_f32(x_0123_f_32x4, vdupq_n_f32(float(offset)));
6751
6752 // the for loop will stop after this iteration
6753 ocean_assert(!(x + 4u < targetWidth));
6754 }
6755
6756 // we calculate the four source locations for our four target locations
6757 const float32x4_t sourceX_0123_f_32x4 = vmaxq_f32(constant_0_f_32x4, vaddq_f32(targetOffsetX_f_32x4, vmulq_f32(sourceX_T_targetX_f_32x4, x_0123_f_32x4)));
6758
6759 // now we determine the pixel/integer accurate source locations
6760 // left = min(floor(sourceX), sourceWidth - 1)
6761 uint32x4_t left_0123_u_32x4 = vminq_u32(vcvtq_u32_f32(sourceX_0123_f_32x4), sourceWidth_1_u_32x4); // no rounding here
6762
6763 // we store the offsets we have calculated
6764 vst1q_u32(memoryHorizontalInterpolationLocations.data<unsigned int>() + x, left_0123_u_32x4);
6765
6766 // factorRight = sourcceX - float(left)
6767 const float32x4_t factorsRight_f_32x4 = vsubq_f32(sourceX_0123_f_32x4, vcvtq_f32_u32(left_0123_u_32x4));
6768
6769 vst1q_f32(memoryHorizontalInterpolationFactorsRight.data<float>() + x, factorsRight_f_32x4);
6770
6771 // [x + 0, x + 1, x + 2, x + 3] + [4, 4, 4, 4]
6772 x_0123_f_32x4 = vaddq_f32(x_0123_f_32x4, constant_4_f_32x4);
6773 }
6774 }
6775 else
6776 {
6777 const float targetOffsetX = sourceX_T_targetX * 0.5f - 0.5f;
6778
6779 // we pre-calculate the interpolation factors and pixel locations in horizontal direction
6780
6781 for (unsigned int x = 0u; x < targetWidth; ++x)
6782 {
6783 const float sourceX = max(0.0f, targetOffsetX + float(x) * sourceX_T_targetX);
6784
6785 const unsigned int left = min((unsigned int)sourceX, sourceWidth - 1u); // no rounding here
6786
6787 memoryHorizontalInterpolationLocations.data<unsigned int>()[x] = left;
6788
6789 const float factorRight = sourceX - float(left);
6790 ocean_assert(factorRight >= 0.0f && factorRight <= 1.0f);
6791
6792 memoryHorizontalInterpolationFactorsRight.data<float>()[x] = factorRight;
6793 }
6794 }
6795 }
6796
6797 const float targetOffsetY = sourceY_T_targetY * 0.5f - 0.5f;
6798
6799 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6800 {
6801 const float sourceY = minmax<float>(0.0f, targetOffsetY + sourceY_T_targetY * float(y), float(sourceHeight) - 1.0f);
6802
6803 const unsigned int sourceRowTop = (unsigned int)sourceY; // we must not round here
6804 const float factorBottom = sourceY - float(sourceRowTop);
6805 ocean_assert(factorBottom >= 0.0f && factorBottom <= 1.0f);
6806
6807 const unsigned int sourceRowBottom = min(sourceRowTop + 1u, sourceHeight - 1u);
6808
6809 const float* const sourceTopRow = source + sourceStrideElements * sourceRowTop;
6810 const float* const sourceBottomRow = source + sourceStrideElements * sourceRowBottom;
6811
6812 float* targetRow = nullptr;
6813
6814 if (sourceHeight == targetHeight)
6815 {
6816 ocean_assert(sourceWidth != targetWidth);
6817 ocean_assert(memoryIntermediateExtendedRow);
6818
6819 // we do not need to interpolate two lines, thus we simply need to copy the row (as we need an additional pixel at the end)
6820 memcpy(memoryIntermediateExtendedRow.data<float>(), sourceTopRow, sourceWidth * sizeof(float));
6821 }
6822 else
6823 {
6824 // in case we do not scale the width of the frame, we can write the result to the target frame directly
6825 targetRow = memoryIntermediateExtendedRow.isNull() ? target : memoryIntermediateExtendedRow.data<float>();
6826
6827 ocean_assert(targetRow != nullptr);
6828 ocean_assert(interpolateRowVerticalFunction != nullptr);
6829 interpolateRowVerticalFunction(sourceTopRow, sourceBottomRow, targetRow, sourceWidth * 1u, factorBottom);
6830 }
6831
6832 if (memoryIntermediateExtendedRow) // sourceWidth != targetWidth
6833 {
6834 // we use an extended row (with one additional pixel at the end - equal to the last pixel)
6835 // so we have to copy the last pixel
6836 memoryIntermediateExtendedRow.data<float>()[sourceWidth] = memoryIntermediateExtendedRow.data<float>()[sourceWidth - 1u];
6837
6838 interpolateRowHorizontalFunction(memoryIntermediateExtendedRow.data<float>(), target, targetWidth, 1u, memoryHorizontalInterpolationLocations.data<unsigned int>(), memoryHorizontalInterpolationFactorsRight.data<float>());
6839 }
6840
6841 target += targetStrideElements;
6842 }
6843}
6844
6845#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
6846
6847template <typename T, typename TScale, unsigned int tChannels>
6848void FrameInterpolatorBilinear::scaleSubset(const T* source, T* target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6849{
6850 static_assert((std::is_same<float, TScale>::value || std::is_same<double, TScale>::value), "Invalid TScale type");
6851
6852 ocean_assert(source != nullptr && target != nullptr);
6853 ocean_assert(sourceWidth != 0u && sourceHeight != 0u);
6854 ocean_assert_and_suppress_unused(targetWidth >= 1u && targetHeight >= 1u, targetHeight);
6855 ocean_assert(sourceX_s_targetX > 0.0 && sourceY_s_targetY > 0.0);
6856
6857 const unsigned int sourceStrideElements = sourceWidth * tChannels + sourcePaddingElements;
6858 const unsigned int targetStrideElements = targetWidth * tChannels + targetPaddingElements;
6859
6860 const TScale sourceX_T_targetX = TScale(sourceX_s_targetX);
6861 const TScale sourceY_T_targetY = TScale(sourceY_s_targetY);
6862
6863 /*
6864 * We determine the sub-pixel accurate source location for each target pixel as follows:
6865 *
6866 * Example with a downsampling by factor 4:
6867 * sourceRow with 12 pixels: | 0 1 2 3 4 5 6 7 8 9 A B |
6868 * targetRow with 3 pixels: | 0 1 2 |
6869 *
6870 * Thus, the source row can be separated into three blocks;
6871 * and we want to extract the color information from the center of the blocks:
6872 * sourceRow with 12 pixels: | 0 1 2 3 | 4 5 6 7 | 8 9 A B |
6873 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 4)
6874 *
6875 * Thus, we add 0.5 to each target coordinate before converting it to a source location;
6876 * and subtract 0.5 again afterwards:
6877 * sourceX = (targetX + 0.5) * targetTSourceX - 0.5
6878 *
6879 * e.g., (0 + 0.5) * 4 - 0.5 = 1.5
6880 * (1 + 0.5) * 4 - 0.5 = 5.5
6881 *
6882 *
6883 * Example with a downsampling by factor 3:
6884 * sourceRow with 9 pixels: | 0 1 2 3 4 5 6 7 8 |
6885 * targetRow with 3 pixels: | 0 1 2 |
6886 *
6887 * sourceRow with 9 pixels: | 0 1 2 | 3 4 5 | 6 7 8 |
6888 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 3)
6889 *
6890 * e.g., (0 + 0.5) * 3 - 0.5 = 1
6891 * (1 + 0.5) * 3 - 0.5 = 4
6892 *
6893 *
6894 * Example with a downsampling by factor 2:
6895 * sourceRow with 6 pixels: | 0 1 2 3 4 5 |
6896 * targetRow with 3 pixels: | 0 1 2 |
6897 *
6898 * sourceRow with 6 pixels: | 0 1 | 2 3 | 4 5 |
6899 * targetRow with 3 pixels: | 0 | 1 | 2 | (targetTSourceX = 2)
6900 *
6901 * e.g., (0 + 0.5) * 2 - 0.5 = 0.5
6902 * (1 + 0.5) * 2 - 0.5 = 2.5
6903 *
6904 *
6905 * we can simplify the calculation (as we have a constant term):
6906 * sourceX = (targetX * targetTSourceX) + (0.5 * targetTSourceX - 0.5)
6907 */
6908
6909 const TScale sourceX_T_targetXOffset = sourceX_T_targetX * TScale(0.5) - TScale(0.5);
6910 const TScale sourceY_T_targetYOffset = sourceY_T_targetY * TScale(0.5) - TScale(0.5);
6911
6912 const TScale sourceWidth_1 = TScale(sourceWidth - 1u);
6913 const TScale sourceHeight_1 = TScale(sourceHeight - 1u);
6914
6915 target += targetStrideElements * firstTargetRow;
6916
6917 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6918 {
6919 const TScale sy = minmax(TScale(0), sourceY_T_targetYOffset + sourceY_T_targetY * TScale(y), sourceHeight_1);
6920 ocean_assert(sy >= TScale(0) && sy < TScale(sourceHeight));
6921
6922 const unsigned int sTop = (unsigned int)sy;
6923 ocean_assert(sy >= TScale(sTop));
6924
6925 const TScale factorBottom = sy - TScale(sTop);
6926 ocean_assert(factorBottom >= TScale(0) && factorBottom <= TScale(1));
6927
6928 const TScale factorTop = TScale(1) - factorBottom;
6929 ocean_assert(factorTop >= TScale(0) && factorTop <= TScale(1));
6930
6931 const T* const sourceTop = source + sTop * sourceStrideElements;
6932 const T* const sourceBottom = (sTop + 1u < sourceHeight) ? sourceTop + sourceStrideElements : sourceTop;
6933
6934 for (unsigned int x = 0; x < targetWidth; ++x)
6935 {
6936 const TScale sx = minmax(TScale(0), sourceX_T_targetXOffset + sourceX_T_targetX * TScale(x), sourceWidth_1);
6937 ocean_assert(sx >= TScale(0) && sx < TScale(sourceWidth));
6938
6939 const unsigned int sLeft = (unsigned int)sx;
6940 ocean_assert(sx >= TScale(sLeft));
6941
6942 const TScale factorRight = sx - TScale(sLeft);
6943 ocean_assert(factorRight >= TScale(0) && factorRight <= TScale(1));
6944
6945 const TScale factorLeft = TScale(1) - factorRight;
6946 ocean_assert(factorLeft >= TScale(0) && factorLeft <= TScale(1));
6947
6948 const unsigned int sourceRightOffset = sLeft + 1u < sourceWidth ? tChannels : 0u;
6949
6950 const T* const sourceTopLeft = sourceTop + sLeft * tChannels;
6951 const T* const sourceBottomLeft = sourceBottom + sLeft * tChannels;
6952
6953 const TScale factorTopLeft = factorTop * factorLeft;
6954 const TScale factorTopRight = factorTop * factorRight;
6955 const TScale factorBottomLeft = factorBottom * factorLeft;
6956 const TScale factorBottomRight = factorBottom * factorRight;
6957
6958 for (unsigned int n = 0u; n < tChannels; ++n)
6959 {
6960 target[n] = T(TScale(sourceTopLeft[n]) * factorTopLeft + TScale(sourceTopLeft[sourceRightOffset + n]) * factorTopRight
6961 + TScale(sourceBottomLeft[n]) * factorBottomLeft + TScale(sourceBottomLeft[sourceRightOffset + n]) * factorBottomRight);
6962 }
6963
6964 target += tChannels;
6965 }
6966
6967 target += targetPaddingElements;
6968 }
6969}
6970
6971template <unsigned int tChannels>
6972void FrameInterpolatorBilinear::rotate8BitPerChannelSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t* borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
6973{
6974 static_assert(tChannels != 0u, "Invalid channel number!");
6975
6976 ocean_assert(firstTargetRow + numberTargetRows <= height);
6977
6978 using PixelType = typename DataType<uint8_t, tChannels>::Type;
6979
6980 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
6981
6982 uint8_t zeroColor[tChannels] = {uint8_t(0)};
6983 const PixelType bColor = borderColor ? *(const PixelType*)borderColor : *(const PixelType*)zeroColor;
6984
6985 const SquareMatrix3 rotationMatrix3(Rotation(0, 0, 1, angle));
6986 const SquareMatrix2 rotationMatrix2(rotationMatrix3(0, 0), rotationMatrix3(1, 0), rotationMatrix3(0, 1), rotationMatrix3(1, 1));
6987
6988 const Scalar width_1 = Scalar(width - 1u);
6989 const Scalar height_1 = Scalar(height - 1u);
6990 const Vector2 anchorPosition(horizontalAnchorPosition, verticalAnchorPosition);
6991
6992 for (unsigned int y = firstTargetRow; y < firstTargetRow + numberTargetRows; ++y)
6993 {
6994 PixelType* targetPixel = (PixelType*)(target + y * targetStrideElements);
6995
6996 const Scalar floatY = Scalar(y);
6997
6998 for (unsigned int x = 0; x < width; ++x)
6999 {
7000 const Vector2 sourceLocation(anchorPosition + rotationMatrix2 * (Vector2(Scalar(x), floatY) - anchorPosition));
7001
7002 if (sourceLocation.x() >= 0 && sourceLocation.y() >= 0 && sourceLocation.x() <= width_1 && sourceLocation.y() <= height_1)
7003 {
7004 interpolatePixel8BitPerChannel<tChannels, PC_TOP_LEFT>(source, width, height, sourcePaddingElements, sourceLocation, (uint8_t*)(targetPixel));
7005 }
7006 else
7007 {
7008 *targetPixel = bColor;
7009 }
7010
7011 ++targetPixel;
7012 }
7013 }
7014}
7015
7016} // namespace CV
7017
7018} // namespace Ocean
7019
7020#endif // META_OCEAN_CV_FRAME_INTERPOLATOR_BILINEAR_H
This class implements the abstract base class for all AnyCamera objects.
Definition AnyCamera.h:131
virtual VectorT3< T > vector(const VectorT2< T > &distortedImagePoint, const bool makeUnitVector=true) const =0
Returns a vector starting at the camera's center and intersecting a given 2D point in the image.
virtual unsigned int width() const =0
Returns the width of the camera image.
virtual unsigned int height() const =0
Returns the height of the camera image.
virtual VectorT2< T > projectToImageIF(const VectorT3< T > &objectPoint) const =0
Projects a 3D object point into the camera frame.
virtual bool isValid() const =0
Returns whether this camera is valid.
Helper class allowing to determine the offset that is necessary to access the alpha channel.
Definition FrameBlender.h:60
static constexpr unsigned int data()
Returns the offset that is applied to access the first data channel.
Definition FrameBlender.h:1171
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameInterpolatorBilinear.h:61
static bool homographies(const Frame &input, Frame &output, const SquareMatrix3 homographies[4], const Vector2 &outputQuadrantCenter, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool zoom(const Frame &source, Frame &target, const Scalar zoomFactor, Worker *worker=nullptr)
Zooms into a given input frame (or zooms out) and stores the zoomed image content in an output frame.
static bool homographyMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &input_H_output, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool lookupMask(const Frame &input, Frame &output, Frame &outputMask, const LookupTable &input_LT_output, const bool offset, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table a...
static bool homographyWithCameraMask(const AnyCamera &inputCamera, const AnyCamera &outputCamera, const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 &homography, Worker *worker=nullptr, const uint8_t maskValue=0xFFu)
Transforms a given input frame into an output frame by application of a homography.
static bool rotate(const Frame &source, Frame &target, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
static bool resize(const Frame &source, Frame &target, Worker *worker=nullptr)
Resizes/rescales a given frame by application of a bilinear interpolation.
static bool homographiesMask(const Frame &input, Frame &output, Frame &outputMask, const SquareMatrix3 *homographies, const Vector2 &outputQuadrantCenter, Worker *worker=nullptr, const uint8_t maskValue=0xFF, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
static bool interpolatePixel(const TSource *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:1531
static bool resampleCameraImage(const Frame &sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, Frame &targetFrame, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const void *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
static bool homographyWithCamera(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const Frame &input, Frame &output, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor=nullptr, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of a homography.
static bool lookup(const Frame &input, Frame &output, const LookupTable &input_LT_output, const bool offset, const void *borderColor, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
static bool affine(const Frame &source, Frame &target, const SquareMatrix3 &source_A_target, const uint8_t *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &targetOrigin=PixelPositionI(0, 0))
Applies an affine transformation to an image.
static bool interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int channels, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const PixelCenter pixelCenter, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1438
static bool homography(const Frame &input, Frame &output, const SquareMatrix3 &input_H_output, const void *borderColor=nullptr, Worker *worker=nullptr, const PixelPositionI &outputOrigin=PixelPositionI(0, 0))
Transforms a given input frame into an output frame (with arbitrary frame dimension) by application o...
This class implements highly optimized interpolation functions with fixed properties.
Definition FrameInterpolatorBilinear.h:342
static void resize400x400To256x256_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 256x256 ...
static void resize400x400To224x224_8BitPerChannel(const uint8_t *const source, uint8_t *const target, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Resizes a given FORMAT_Y8 frame with resolution 400x400 to a FORMAT_Y8 frame with resolution 224x224 ...
This class implements bilinear frame interpolator functions.
Definition FrameInterpolatorBilinear.h:45
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelNEON(const uint8_t *source, const unsigned int offsetsTopLeftElements[4], const unsigned int offsetsTopRightElements[4], const unsigned int offsetsBottomLeftElements[4], const unsigned int offsetsBottomRightElements[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const uint32x4_t &m128_factorsRight, const uint32x4_t &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:4301
static void homography(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const T *borderColor, T *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a input frame with (almost) arbitrary pixel format into an output frame by application of ...
Definition FrameInterpolatorBilinear.h:1749
static void resampleCameraImage(const T *sourceFrame, const AnyCamera &sourceCamera, const SquareMatrix3 &source_R_target, const AnyCamera &targetCamera, T *targetFrame, const unsigned int sourceFramePaddingElements, const unsigned int targetFramePaddingElements, LookupCorner2< Vector2 > *source_OLT_target=nullptr, Worker *worker=nullptr, const unsigned int binSizeInPixel=8u, const T *borderColor=nullptr)
Re-samples a camera image which has been captured with a camera profile as if the image would have be...
Definition FrameInterpolatorBilinear.h:1909
static void interpolateRowVerticalNEON(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
static void homographyWithCamera8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const SquareMatrix3 &homography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1815
static void lookup(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1849
static void interpolatePixel8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, uint8_t *result)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame.
Definition FrameInterpolatorBilinear.h:1976
static void affine8BitPerChannelSSESubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using SSE).
Definition FrameInterpolatorBilinear.h:2480
static Scalar patchIntensitySum1Channel(const uint32_t *linedIntegralFrame, const unsigned int frameWidth, const unsigned int frameHeight, const unsigned int lineIntegralFramePaddingElements, const Vector2 &center, const CV::PixelCenter pixelCenter, const unsigned int patchWidth, const unsigned int patchHeight)
Interpolate the sum of intensity values of an image patch in a frame, while the frame is provided as ...
static void homographyWithCameraMask8BitPerChannel(const PinholeCamera &inputCamera, const PinholeCamera &outputCamera, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 &homography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1832
static void homographiesMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4523
static void homographiesMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], uint8_t *output, uint8_t *outputMask, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1802
static void interpolateRowHorizontal8BitPerChannel7BitPrecision(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void homography8BitPerChannelNEONSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:3593
static void interpolateRowVertical8BitPerChannel7BitPrecisionNEON(const uint8_t *sourceRowTop, const uint8_t *sourceRowBottom, uint8_t *targetRow, const unsigned int elements, const unsigned int factorBottom)
Applies a (vertical) linear interpolation between two rows with 8 bit per channel.
static void homographySubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const T *borderColor, T *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms a frame with (almost) arbitrary pixel format using the given homography.
Definition FrameInterpolatorBilinear.h:2396
static void homographyMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, uint8_t *output, uint8_t *outputMask, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1786
static OCEAN_FORCE_INLINE void interpolate8Pixels1Channel8BitNEON(const uint8x8_t &topLeft_u_8x8, const uint8x8_t &topRight_u_8x8, const uint8x8_t &bottomLeft_u_8x8, const uint8x8_t &bottomRight_u_8x8, const uint8x16_t &factorsRight_factorsBottom_128_u_8x16, uint8_t *targetPositionPixels)
Interpolates 8 independent pixels concurrently of a 1 channel frame, the source pixel locations must ...
Definition FrameInterpolatorBilinear.h:3973
static void homographies8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *homographies, const uint8_t *borderColor, uint8_t *output, const Scalar outputQuadrantCenterX, const Scalar outputQuadrantCenterY, const int outputOriginX, const int outputOriginY, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homographies.
Definition FrameInterpolatorBilinear.h:4367
static void homographyWithCamera8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const SquareMatrix3 *normalizedHomography, const bool useDistortionParameters, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4603
static void affine8BitPerChannel(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 &source_A_target, const uint8_t *borderColor, uint8_t *target, const PixelPositionI &targetOrigin, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Apply an affine transforms to a N-channel, 8-bit frame The target frame must have the same pixel form...
Definition FrameInterpolatorBilinear.h:1673
static void homographyWithCameraMask8BitPerChannelSubset(const PinholeCamera *inputCamera, const PinholeCamera *outputCamera, const PinholeCamera::DistortionLookup *outputCameraDistortionLookup, const uint8_t *input, const unsigned int inputPaddingElements, const SquareMatrix3 *normalizedHomography, uint8_t *output, uint8_t *outputMask, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const uint8_t maskValue, const unsigned int firstRow, const unsigned int numberRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4649
static void affine8BitPerChannelNEONSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image (using NEON).
Definition FrameInterpolatorBilinear.h:3343
static void lookup8BitPerChannelSubsetNEON(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame into an output frame by application of an interpolation lo...
Definition FrameInterpolatorBilinear.h:5008
static void interpolateRowHorizontalNEON(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
static void scale8BitPerChannelSubset7BitPrecisionNEON(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int channels, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rescales a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5423
static OCEAN_FORCE_INLINE __m128i interpolate4Pixels8BitPerChannelSSE(const __m128i &m128_sourcesTopLeft, const __m128i &m128_sourcesTopRight, const __m128i &m128_sourcesBottomLeft, const __m128i &m128_sourcesBottomRight, const __m128i &m128_factorsTopLeft, const __m128i &m128_factorsTopRight, const __m128i &m128_factorsBottomLeft, const __m128i &m128_factorsBottomRight)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
static void interpolateRowHorizontal(const T *extendedSourceRow, T *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const float *interpolationFactorsRight)
Applies a (horizontal) linear interpolation for one row with arbitrary data type.
Definition FrameInterpolatorBilinear.h:5388
static void rotate8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const uint8_t *borderColor, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Rotates a subset of a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6972
static void lookupMask8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable &input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, Worker *worker=nullptr, const uint8_t maskValue=0xFF)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:1896
static OCEAN_FORCE_INLINE void interpolate4Pixels8BitPerChannelSSE(const uint8_t *source, const unsigned int offsetsTopLeft[4], const unsigned int offsetsTopRight[4], const unsigned int offsetsBottomLeft[4], const unsigned int offsetsBottomRight[4], const unsigned int validPixels[4], const typename DataType< uint8_t, tChannels >::Type &borderColor, const __m128i &m128_factorsRight, const __m128i &m128_factorsBottom, typename DataType< uint8_t, tChannels >::Type *targetPositionPixels)
Interpolates 4 independent pixels concurrently based on already known locations (top-left,...
Definition FrameInterpolatorBilinear.h:3275
static void homographies8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 homographies[4], const uint8_t *borderColor, uint8_t *output, const Vector2 &outputQuadrantCenter, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of four homograp...
Definition FrameInterpolatorBilinear.h:1773
static void lookup8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const uint8_t *borderColor, uint8_t *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with uint8_t as element type into an output frame by appli...
Definition FrameInterpolatorBilinear.h:4697
static void resize(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Resizes a given frame with (almost) arbitrary data type (e.g., float, double, int) by using a bilinea...
Definition FrameInterpolatorBilinear.h:1624
static bool coversHomographyInputFrame(const unsigned int inputWidth, const unsigned int inputHeight, const unsigned int outputWidth, const unsigned int outputHeight, const SquareMatrix3 &input_H_output, const int outputOriginX=0, const int outputOriginY=0)
Checks whether the application of a given homography for a specified input frame and output frame cov...
static void scale(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with arbitrary data type (e.g., float, double, int) by using a bilinear interp...
Definition FrameInterpolatorBilinear.h:1637
static void lookupSubset(const T *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, const T *borderColor, T *output, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a given input frame with arbitrary element type into an output frame by applic...
Definition FrameInterpolatorBilinear.h:4751
static void scale8BitPerChannelSubset(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with 8 bit per channel by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:5247
static void rotate8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const Scalar horizontalAnchorPosition, const Scalar verticalAnchorPosition, const Scalar angle, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr, const uint8_t *borderColor=nullptr)
Rotates a given frame by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:1958
static void interpolateRowVertical(const T *sourceRowTop, const T *sourceRowBottom, T *targetRow, const unsigned int elements, const float factorBottom)
Applies a (vertical) linear interpolation between two rows with arbitrary data types.
Definition FrameInterpolatorBilinear.h:5368
static void homography8BitPerChannel(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 &input_H_output, const uint8_t *borderColor, uint8_t *output, const PixelPositionI &outputOrigin, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, Worker *worker=nullptr)
Transforms a given 8 bit per channel input frame into an output frame by application of a homography.
Definition FrameInterpolatorBilinear.h:1711
static void interpolate1PixelFullAlphaBorder8BitPerChannel(const uint8_t *frame, const unsigned int width, const unsigned int height, const Vector2 &position, uint8_t *result, const unsigned int framePaddingElements)
Determines the interpolated pixel values for a given pixel position in an 8 bit per channel frame wit...
Definition FrameInterpolatorBilinear.h:2155
static void lookupMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const LookupTable *input_LT_output, const bool offset, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Transforms a given input frame into an output frame by application of an interpolation lookup table.
Definition FrameInterpolatorBilinear.h:5150
static void affine8BitPerChannelSubset(const uint8_t *source, const unsigned int sourceWidth, const unsigned int sourceHeight, const SquareMatrix3 *source_A_target, const uint8_t *borderColor, uint8_t *target, const unsigned int targetWidth, const unsigned int targetHeight, const unsigned int firstTargetRow, const unsigned int numberTargetRows, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements)
Subset function to apply an affine transform to an N-channel, 8-bit unsigned image.
Definition FrameInterpolatorBilinear.h:2244
static void homography8BitPerChannelSSESubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2665
static void interpolateRowHorizontal8BitPerChannel7BitPrecisionNEON(const uint8_t *extendedSourceRow, uint8_t *targetRow, const unsigned int targetWidth, const unsigned int channels, const unsigned int *interpolationLocations, const uint8_t *interpolationFactors)
Applies a (horizontal) linear interpolation for one row with 8 bit per channel.
static void scale8BitPerChannel(const uint8_t *source, uint8_t *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Rescales a given frame with 8 bit per data channel by using a bilinear interpolation with user-define...
Definition FrameInterpolatorBilinear.h:5203
static void homographyMask8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, uint8_t *output, uint8_t *outputMask, const uint8_t maskValue, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int outputMaskPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:4448
static void scaleSubset(const T *source, T *target, const unsigned int sourceWidth, const unsigned int sourceHeight, const unsigned int targetWidth, const unsigned int targetHeight, const double sourceX_s_targetX, const double sourceY_s_targetY, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstTargetRow, const unsigned int numberTargetRows)
Resizes a subset of a given frame with arbitrary data type by a bilinear interpolation.
Definition FrameInterpolatorBilinear.h:6848
static void homography8BitPerChannelSubset(const uint8_t *input, const unsigned int inputWidth, const unsigned int inputHeight, const SquareMatrix3 *input_H_output, const uint8_t *borderColor, uint8_t *output, const unsigned int outputWidth, const unsigned int outputHeight, const unsigned int inputPaddingElements, const unsigned int outputPaddingElements, const unsigned int firstOutputRow, const unsigned int numberOutputRows)
Transforms an 8 bit per channel frame using the given homography.
Definition FrameInterpolatorBilinear.h:2319
static void interpolatePixel(const TSource *frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, const VectorT2< TScalar > &position, TTarget *result, const TIntermediate &resultBias=TIntermediate(0))
Determines the interpolated pixel values for a given pixel position in a frame with arbitrary data ty...
Definition FrameInterpolatorBilinear.h:2069
static constexpr uint8x8_t create_uint8x8(const uint8_t v0, const uint8_t v1, const uint8_t v2, const uint8_t v3, const uint8_t v4, const uint8_t v5, const uint8_t v6, const uint8_t v7)
Creates a uint8x8_t vector from 8 individual uint8_t values.
Definition NEON.h:591
This class implements a 2D pixel position with pixel precision.
Definition PixelPosition.h:63
T y() const
Returns the vertical coordinate position of this object.
Definition PixelPosition.h:468
T x() const
Returns the horizontal coordinate position of this object.
Definition PixelPosition.h:456
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
Template class allowing to define an array of data types.
Definition DataType.h:27
This class implements Ocean's image class.
Definition Frame.h:1879
void setRelativeTimestamp(const Timestamp &relative)
Sets the relative timestamp of this frame.
Definition Frame.h:4317
bool isValid() const
Returns whether this frame is valid.
Definition Frame.h:4612
void setTimestamp(const Timestamp &timestamp)
Sets the timestamp of this frame.
Definition Frame.h:4312
const Timestamp & timestamp() const
Returns the timestamp of this frame.
Definition Frame.h:4302
const Timestamp & relativeTimestamp() const
Returns the relative timestamp of this frame.
Definition Frame.h:4307
Definition of a frame type composed by the frame dimension, pixel format and pixel origin.
Definition Frame.h:30
size_t sizeY() const
Returns the vertical dimension of this lookup object.
Definition Lookup2.h:947
size_t sizeX() const
Returns the horizontal dimension of this lookup object.
Definition Lookup2.h:941
size_t binsY() const
Returns the number of vertical bins of this lookup object.
Definition Lookup2.h:959
size_t binsX() const
Returns the number of horizontal bins of this lookup object.
Definition Lookup2.h:953
This class implements a 2D lookup object with values at the bins' corners defining the individual loo...
Definition Lookup2.h:636
Vector2 binTopLeftCornerPosition(const size_t binX, const size_t binY) const
Returns the corner position (the top left corner) of a specific bin in relation to the dimension of t...
Definition Lookup2.h:1786
void setBinTopLeftCornerValue(const size_t binX, const size_t binY, const T &value)
Sets the value of one specific lookup bin's top left corner.
Definition Lookup2.h:2128
void bilinearValues(const size_t y, TTarget *values) const
Applies a lookup for an entire row in this lookup object.
Definition Lookup2.h:1864
This class implements an object able to allocate memory.
Definition base/Memory.h:22
bool isNull() const
Returns whether this object holds any memory.
Definition base/Memory.h:401
void * data()
Returns the pointer to the writable memory which is allocated by this object.
Definition base/Memory.h:303
This class provides basic numeric functionalities.
Definition Numeric.h:57
static constexpr T eps()
Returns a small epsilon.
static T floor(const T value)
Returns the largest integer value that is not greater than the given value.
Definition Numeric.h:2035
static constexpr bool isEqualEps(const T value)
Returns whether a value is smaller than or equal to a small epsilon.
Definition Numeric.h:2096
static constexpr bool isNotEqualEps(const T value)
Returns whether a value is not smaller than or equal to a small epsilon.
Definition Numeric.h:2246
unsigned int width() const
Returns the width of the camera image.
Definition PinholeCamera.h:1452
const SquareMatrixT3< T > & invertedIntrinsic() const
Returns the inverted intrinsic camera matrix.
Definition PinholeCamera.h:1333
const SquareMatrixT3< T > & intrinsic() const
Returns the intrinsic camera matrix.
Definition PinholeCamera.h:1327
unsigned int height() const
Returns the height of the camera image.
Definition PinholeCamera.h:1458
VectorT2< T > normalizedImagePoint2imagePoint(const VectorT2< T > &normalizedImagePoint, const bool distortImagePoint) const
Calculates the image point corresponding to a given normalized image point.
Definition PinholeCamera.h:1792
This class implements a 2x2 square matrix.
Definition SquareMatrix2.h:73
bool isNull() const
Returns whether this matrix is a zero matrix.
Definition SquareMatrix3.h:1334
const T * data() const
Returns a pointer to the internal values.
Definition SquareMatrix3.h:1047
bool isOrthonormal(const T epsilon=NumericT< T >::eps()) const
Returns whether this matrix is an orthonormal matrix.
Definition SquareMatrix3.h:1366
const T & x() const noexcept
Returns the x value.
Definition Vector2.h:710
const T & y() const noexcept
Returns the y value.
Definition Vector2.h:722
bool isEqual(const VectorT2< T > &vector, const T eps) const
Returns whether two vectors are equal up to a specified epsilon.
Definition Vector2.h:758
const T & y() const noexcept
Returns the y value.
Definition Vector3.h:824
const T & x() const noexcept
Returns the x value.
Definition Vector3.h:812
const T & z() const noexcept
Returns the z value.
Definition Vector3.h:836
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
T minmax(const T &lowerBoundary, const T &value, const T &upperBoundary)
This function fits a given parameter into a specified value range.
Definition base/Utilities.h:973
PixelCenter
Definition of individual centers of pixels.
Definition CV.h:117
@ PC_TOP_LEFT
The center of a pixel is in the upper-left corner of each pixel's square.
Definition CV.h:133
@ PC_CENTER
The center of a pixel is located in the center of each pixel's square (with an offset of 0....
Definition CV.h:150
float Scalar
Definition of a scalar type.
Definition Math.h:129
SquareMatrixT3< Scalar > SquareMatrix3
Definition of the SquareMatrix3 object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION either with ...
Definition SquareMatrix3.h:43
VectorT3< Scalar > Vector3
Definition of a 3D vector.
Definition Vector3.h:29
VectorT2< Scalar > Vector2
Definition of a 2D vector.
Definition Vector2.h:28
RotationT< Scalar > Rotation
Definition of the Rotation object, depending on the OCEAN_MATH_USE_SINGLE_PRECISION flag either with ...
Definition Rotation.h:32
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32
float Type
The 32 bit floating point data type for any data type T but 'double'.
Definition DataType.h:373