Ocean
Loading...
Searching...
No Matches
FrameChannels.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
10
11#include "ocean/cv/CV.h"
13#include "ocean/cv/NEON.h"
14#include "ocean/cv/SSE.h"
15
16#include "ocean/base/DataType.h"
17#include "ocean/base/Frame.h"
18#include "ocean/base/Worker.h"
19
20namespace Ocean
21{
22
23namespace CV
24{
25
26/**
27 * This class implements frame channel conversion, transformation and extraction functions.
28 * @ingroup cv
29 */
30class OCEAN_CV_EXPORT FrameChannels : public FrameConverter
31{
32 public:
33
34 /**
35 * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
36 */
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
38
39 /**
40 * Definition of a function pointer to a function able to operate on an entire image row.
41 */
42 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(const TSource* sourceRow, TTarget* targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
44
45 /**
46 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
47 * Best practice is to avoid using these functions if binary size matters,<br>
48 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
49 */
50 class OCEAN_CV_EXPORT Comfort
51 {
52 public:
53
54 /**
55 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
56 * Usage:
57 * @code
58 * Frame rgbSourceFrame = ...;
59 *
60 * Frames targetFrames;
61 *
62 * if (separateTo1Channel(rgbSourceFrame, targetFrames))
63 * {
64 * ocean_assert(targetFrames.size() == 3);
65 *
66 * // do something with targetFrames
67 * }
68 * @endcode
69 * @param sourceFrame The frame to be separated, must be valid
70 * @param targetFrames The resulting frames each holding one channel of the source frame, will be set automatically
71 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
72 * @return True, if succeeded
73 */
74 static bool separateTo1Channel(const Frame& sourceFrame, Frames& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
75
76 /**
77 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
78 * Usage:
79 * @code
80 * Frame rgbSourceFrame = ...;
81 *
82 * Frame targetFrameA;
83 * Frame targetFrameB;
84 * Frame targetFrameC;
85 *
86 * if (separateTo1Channel(rgbSourceFrame, {&targetFrameA, &targetFrameB, &targetFrameC}))
87 * {
88 * // do something with targetFrames
89 * }
90 * @endcode
91 * @param sourceFrame The frame to be separated, must be valid
92 * @param targetFrames The resulting frames each holding one channel of the source frame, one for each source channels
93 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
94 * @return True, if succeeded
95 */
96 static bool separateTo1Channel(const Frame& sourceFrame, const std::initializer_list<Frame*>& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
97
98 /**
99 * Zips/interleaves 1-channel images into one image with n-channels.
100 * Usage:
101 * @code
102 * Frame sourceFrameA = ...;
103 * Frame sourceFrameB = ...;
104 * Frame sourceFrameC = ...;
105 *
106 * Frame targetFrame;
107 * if (zipChannels({sourceFrameA, sourceFrameB, sourceFrameC}, targetFrame))
108 * {
109 * ocean_assert(targetFrame.channels() == 3u);
110 *
111 * // do something with targetFrame
112 * }
113 * @endcode
114 * @param sourceFrames The frames to be zipped/interleaved, must be valid
115 * @param targetFrame The resulting frame holding n channels, will be set automatically
116 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
117 * @return True, if succeeded
118 */
119 static bool zipChannels(const std::initializer_list<Frame>& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
120
121 /**
122 * Zips/interleaves 1-channel images into one image with n-channels.
123 * Usage:
124 * @code
125 * Frames sourceFrames = ...;
126 *
127 * Frame targetFrame;
128 * if (zipChannels(sourceFrames, targetFrame))
129 * {
130 * ocean_assert(targetFrame.channels() == sourceFrames.size());
131 *
132 * // do something with targetFrame
133 * }
134 * @endcode
135 * @param sourceFrames The frames to be zipped/interleaved, must be valid
136 * @param targetFrame The resulting frame holding n channels, will be set automatically
137 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
138 * @return True, if succeeded
139 */
140 static bool zipChannels(const Frames& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
141
142 /**
143 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
144 * @param frame The image to convert, must be valid
145 * @param worker Optional worker object to distribute the computation
146 * @return True, if succeeded
147 * @see straightAlphaToPremultipliedAlpha().
148 */
149 static bool premultipliedAlphaToStraightAlpha(Frame& frame, Worker* worker = nullptr);
150
151 /**
152 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
153 * @param source The source image to convert, must be valid
154 * @param target The resulting converted target image, the frame type will be changed if it is not match to the source frame
155 * @param worker Optional worker object to distribute the computation
156 * @return True, if succeeded
157 * @see straightAlphaToPremultipliedAlpha().
158 */
159 static bool premultipliedAlphaToStraightAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
160
161 /**
162 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
163 * @param frame The image to convert, must be valid
164 * @param worker Optional worker object to distribute the computation
165 * @see premultipliedAlphaToStraightAlpha().
166 */
167 static bool straightAlphaToPremultipliedAlpha(Frame& frame, Worker* worker = nullptr);
168
169 /**
170 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
171 * @param source The source image to convert, must be valid
172 * @param target The resulting converted target image, must be valid
173 * @param worker Optional worker object to distribute the computation
174 * @see premultipliedAlphaToStraightAlpha().
175 */
176 static bool straightAlphaToPremultipliedAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
177 };
178
179 /**
180 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
181 * Usage:
182 * @code
183 * const unsigned int width = ...;
184 * const unsigned int height = ...;
185 *
186 * uint8_t* sourceFrame = ...;
187 * const unsigned int sourceFramePaddingElements = ...;
188 *
189 * constexpr unsigned int channels = 2u;
190 *
191 * const uint8_t* targetFrames[channels] = {..., ...};
192 * const unsigned int targetFramesPaddingElements[2] = {..., ...};
193 *
194 * separateTo1Channel<uint8_t, uint8_t, channels>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
195 * @endcode
196 * @param sourceFrame The frame to be separated, must be valid
197 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
198 * @param width The width of the source frame in pixel, with range [1, infinity)
199 * @param height The height of the source frame in pixel, with range [1, infinity)
200 * @param channels The number of channels the source frame has, with range [1, infinity)
201 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
202 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
203 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
204 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
205 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
206 */
207 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
209
210 /**
211 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
212 * Usage:
213 * @code
214 * const unsigned int width = ...;
215 * const unsigned int height = ...;
216 *
217 * const uint8_t* sourceFrame = ...;
218 * const unsigned int sourceFramePaddingElements = ...;
219 *
220 * uint8_t* targetFrame0 = ...;
221 * uint8_t* targetFrame1 = ...;
222 * const unsigned int targetFramePaddingElements0 = ...;
223 * const unsigned int targetFramePaddingElements1 = ...;
224 *
225 * separateTo1Channel<uint8_t, uint8_t>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
226 * @endcode
227 * @param sourceFrame The frame to be separated, must be valid
228 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
229 * @param width The width of the source frame in pixel, with range [1, infinity)
230 * @param height The height of the source frame in pixel, with range [1, infinity)
231 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
232 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
233 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
234 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
235 */
236 template <typename TSource, typename TTarget>
237 static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
238
239 /**
240 * Zips/interleaves 1-channel images into one image with n-channels.
241 * Usage:
242 * @code
243 * const unsigned int width = ...;
244 * const unsigned int height = ...;
245 *
246 * const uint8_t* sourceFrames[2] = {..., ...};
247 * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
248 *
249 * uint8_t* targetFrame = ...;
250 * const unsigned int targetFramePaddingElements = ...;
251 *
252 * zipChannels<uint8_t, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFramesPaddingElements, targetFramePaddingElements);
253 * @endcode
254 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
255 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
256 * @param width The width of the source frames in pixel, with range [1, infinity)
257 * @param height The height of the source frames in pixel, with range [1, infinity)
258 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
259 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
260 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
261 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
262 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
263 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
264 */
265 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
267
268 /**
269 * Zips/interleaves 1-channel images into one image with n-channels.
270 * Usage:
271 * @code
272 * const unsigned int width = ...;
273 * const unsigned int height = ...;
274 *
275 * const uint8_t* sourceFrame0 = ...;
276 * const uint8_t* sourceFrame1 = ...;
277 * const unsigned int sourceFramePaddingElements0 = ...;
278 * const unsigned int sourceFramePaddingElements1 = ...;
279 *
280 * uint8_t* targetFrame = ...;
281 * const unsigned int targetFramePaddingElements = ...;
282 *
283 * zipChannels<uint8_t, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
284 * @endcode
285 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
286 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
287 * @param width The width of the source frames in pixel, with range [1, infinity)
288 * @param height The height of the source frames in pixel, with range [1, infinity)
289 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
290 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
291 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
292 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
293 */
294 template <typename TSource, typename TTarget>
295 static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
296
297 /**
298 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the front of all existing channels.
299 * @param source The source frame to which the new channel will be added, must be valid
300 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
301 * @param target The target frame receiving the joined channels, must be valid
302 * @param width The width of the frames in pixel, with range [1, infinity)
303 * @param height The height of the frames in pixel, with range [1, infinity)
304 * @param conversionFlag The conversion to be applied
305 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
306 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
307 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
308 * @param worker Optional worker object to distribute the computational load
309 * @tparam T Data type of each channel pixel value
310 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
311 */
312 template <typename T, unsigned int tSourceChannels>
313 static inline void addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
314
315 /**
316 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
317 * @param source The source frame that provided the existing channels
318 * @param newChannelValue Value that will be assigned to the new channel for each pixel
319 * @param target The target frame to that the existing channels and the new channel will be added (as new first channel)
320 * @param width The width of the frames in pixel, with range [1, infinity)
321 * @param height The height of the frames in pixel, with range [1, infinity)
322 * @param conversionFlag The conversion to be applied
323 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
324 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
325 * @param worker Optional worker object to distribute the computational load
326 * @tparam T Data type of each channel pixel value
327 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
328 */
329 template <typename T, unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
331
332 /**
333 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the back of all existing channels.
334 * @param source The source frame to which the new channel will be added, must be valid
335 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
336 * @param target The target frame receiving the joined channels, must be valid
337 * @param width The width of the frames in pixel, with range [1, infinity)
338 * @param height The height of the frames in pixel, with range [1, infinity)
339 * @param conversionFlag The conversion to be applied
340 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
341 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
342 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
343 * @param worker Optional worker object to distribute the computational load
344 * @tparam T Data type of each channel pixel value
345 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
346 */
347 template <typename T, unsigned int tSourceChannels>
348 static inline void addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
349
350 /**
351 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
352 * @param source The source frame that provided the existing channels
353 * @param newChannelValue Value that will be assigned to the new channel for each pixel
354 * @param target The target frame to that the existing channels and the new channel will be added (as new last channel)
355 * @param width The width of the frames in pixel, with range [1, infinity)
356 * @param height The height of the frames in pixel, with range [1, infinity)
357 * @param conversionFlag The conversion to be applied
358 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
359 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
360 * @param worker Optional worker object to distribute the computational load
361 * @tparam T Data type of each channel pixel value
362 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
363 */
364 template <typename T, unsigned int tSourceChannels>
365 static inline void addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
366
367 /**
368 * Removes the first channel from a given frame with zipped (generic) pixel format.
369 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
370 * @param source The source frame from that the first channel will be removed, must be valid
371 * @param target The target frame without the first channel, must be valid
372 * @param width The width of the frames in pixel, with range [1, infinity)
373 * @param height The height of the frames in pixel, with range [1, infinity)
374 * @param conversionFlag The conversion to be applied
375 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
376 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
377 * @param worker Optional worker object to distribute the computational load
378 * @tparam T Data type of each channel pixel value
379 * @tparam tSourceChannels Number of channels of the source frame (including the channel that will be removed), with range [2, infinity)
380 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeLastChannel().
381 */
382 template <typename T, unsigned int tSourceChannels>
383 static inline void removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
384
385 /**
386 * Removes the last channel from a given frame with zipped (generic) pixel format.
387 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
388 * @param source The source frame from that the first channel will be removed, must be valid
389 * @param target The target frame without the first channel, must be valid
390 * @param width The width of the frames in pixel, with range [1, infinity)
391 * @param height The height of the frames in pixel, with range [1, infinity)
392 * @param conversionFlag The conversion to be applied
393 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
394 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
395 * @param worker Optional worker object to distribute the computational load
396 * @tparam T Data type of each channel pixel value
397 * @tparam tSourceChannels Number of channels of the frame (including the channel that will be removed), with range [2, infinity)
398 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeFirstChannel().
399 */
400 template <typename T, unsigned int tSourceChannels>
401 static inline void removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
402
403 /**
404 * Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel format.
405 * @param source The source frame from that the channel will be copied, must be valid
406 * @param target The target frame to which the channel will be copied, must be valid
407 * @param width The width of both frames in pixel, with range [1, infinity)
408 * @param height The height of both frames in pixel, with range [1, infinity)
409 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
410 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
411 * @param worker Optional worker object to distribute the computational load
412 * @tparam T Data type of each channel pixel value
413 * @tparam tSourceChannels Number of channels in the source frame, with range [1, infinity)
414 * @tparam tTargetChannels Number of channels in the target frame, with range [1, infinity)
415 * @tparam tSourceChannelIndex The index of the source channel that will be copied, with range [0, tSourceChannels - 1]
416 * @tparam tTargetChannelIndex The index of the target channel that will be copied, with range [0, tTargetChannels - 1]
417 */
418 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
419 static inline void copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
420
421 /**
422 * Sets one channel of a frame with a specific unique value.
423 * @param frame The frame in that one channel of each pixel will be set
424 * @param width The width of the frame in pixel, with range [1, infinity)
425 * @param height The height of the frame in pixel, with range [1, infinity)
426 * @param value The value to be set
427 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
428 * @param worker Optional worker object to distribute the computation
429 * @tparam T Data type of each channel pixel value
430 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
431 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
432 */
433 template <typename T, unsigned int tChannel, unsigned int tChannels>
434 static inline void setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker = nullptr);
435
436 /**
437 * Reverses the order of the channels of a frame with zipped pixel format.
438 * The first channel will be exchanged with the last channel, the second channel will be exchanged with the second last channel and so on.
439 * @param source The source frame from that the channels will be swapped, must be valid
440 * @param target The target frame that receives the swapped channels, must be valid
441 * @param width The width of the source frame in pixel, with range (0, infinity)
442 * @param height The height of the source frame in pixel, with range (0, infinity)
443 * @param conversionFlag The conversion to be applied
444 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
445 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
446 * @param worker Optional worker object to distribute the computation
447 * @tparam T Data type of each channel pixel value
448 * @tparam tChannels Number of data channels, with range [1, infinity)
449 */
450 template <typename T, unsigned int tChannels>
451 static inline void reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
452
453 /**
454 * Shuffles the channels of a frame by an arbitrary pattern.
455 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
456 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
457 * <pre>
458 * source pixel R G B A
459 * 0 1 2 3
460 * target pixel B G R A
461 * 2 1 0 3
462 * pattern (with reversed order): 0x3012
463 * </pre>
464 * @param source The source frame for which the channels will be shuffled, must be valid
465 * @param target The target frame that receives the shuffled channels, must be valid
466 * @param width The width of the source frame in pixel, with range [1, infinity)
467 * @param height The height of the source frame in pixel, with range [1, infinity)
468 * @param conversionFlag The conversion to be applied
469 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
470 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
471 * @param worker Optional worker object to distribute the computation
472 * @tparam T Data type of each channel pixel value
473 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
474 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
475 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
476 */
477 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
478 static inline void shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
479
480 /**
481 * Shuffles the channels of source frame and sets the last channel with constant value in the target frame.
482 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
483 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
484 * <pre>
485 * source pixel R G B
486 * 0 1 2
487 * target pixel B G R A
488 * 2 1 0
489 * pattern (with reversed order): 0x012
490 * </pre>
491 * @param source The source frame for which the channels will be shuffled, must be valid
492 * @param newChannelValue The constant channel value which will be added as last channel to the target frame, with range [0, infinity)
493 * @param target The target frame that receives the shuffled channels, must be valid
494 * @param width The width of the source frame in pixel, with range [1, infinity)
495 * @param height The height of the source frame in pixel, with range [1, infinity)
496 * @param conversionFlag The conversion to be applied
497 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
498 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
499 * @param worker Optional worker object to distribute the computation
500 * @tparam T Data type of each channel pixel value
501 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
502 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
503 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
504 */
505 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
507
508 /**
509 * Narrows 16 bit channels of a frame to 8 bit channels.
510 * @param source The source frame for which the channels will be narrowed, must be valid
511 * @param target The target frame that receives the narrowed channels, must be valid
512 * @param width The width of the source frame in pixel, with range [1, infinity)
513 * @param height The height of the source frame in pixel, with range [1, infinity)
514 * @param conversionFlag The conversion to be applied
515 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
516 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
517 * @param worker Optional worker object to distribute the computation
518 * @tparam tChannels Number of source data channels, with range [1, infinity)
519 */
520 template <unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
522
523 /**
524 * Applies a specific modifier function on each pixel.
525 * @param source The source frame providing the pixel information, must be valid
526 * @param target The target frame receiving the pixel information, must be valid
527 * @param width The width of the source frame in pixel, with range (0, infinity)
528 * @param height The height of the source frame in pixel, with range (0, infinity)
529 * @param conversionFlag The conversion to be applied
530 * @param worker Optional worker object to distribute the computation
531 * @tparam T Data type of each channel pixel value
532 * @tparam tChannels Number of data channels, with range [1, infinity)
533 * @tparam tPixelFunction Pixel modification function
534 */
535 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker = nullptr);
537
538 /**
539 * Applies a specific modifier function on each pixel.
540 * @param source The source frame providing the pixel information, must be valid
541 * @param target The target frame receiving the pixel information, must be valid
542 * @param width The width of the source frame in pixel, with range [1, infinity)
543 * @param height The height of the source frame in pixel, with range [1, infinity)
544 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
545 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
546 * @param conversionFlag The conversion to be applied
547 * @param worker Optional worker object to distribute the computation
548 * @tparam TSource Data type of each source channel pixel value
549 * @tparam TTarget Data type of each target channel pixel value
550 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
551 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
552 * @tparam tPixelFunction Pixel modification function
553 */
554 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
556
557 /**
558 * Generic bivariate pixel operations
559 * Applies bivariate per-pixel operators: `C(y, x) = op(A(y, x), B(y, x))`. Input and output must have the same frame type and have a single plane.
560 * @param source0 First source frame
561 * @param source1 Second source frame
562 * @param target The target frame
563 * @param width The width of the source frame in pixel, with range [1, infinity)
564 * @param height The height of the source frame in pixel, with range [1, infinity)
565 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
566 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
567 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
568 * @param conversionFlag The conversion to be applied
569 * @param worker Optional worker object to distribute the computation
570 * @tparam TSource0 Type of the first data source
571 * @tparam TSource1 Type of the second data source
572 * @tparam TTarget Type of the target
573 * @tparam TIntermediate Data type that is used for the computation of intermediate results, e.g. if TSource0 and TSource1 are different
574 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
575 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
576 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
577 */
578 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
580
581 /**
582 * Applies a row operator to all rows of a source image.
583 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
584 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
585 * @param source The source frame to which the row operator is applied, must be valid
586 * @param target The target frame receiving the result of the row operator, must be valid
587 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
588 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
589 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
590 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
591 * @param rowOperatorFunction The pointer to the row operator function, must be valid
592 * @param worker Optional worker object to distribute the computation
593 * @tparam TSource The data type of the source elements
594 * @tparam TTarget The data type of the target elements
595 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
596 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
597 */
598 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
599 static void applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker = nullptr);
600
601 /**
602 * Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
603 * This function mainly mirrors or flips an image.
604 * @param source The source frame buffer, must be valid
605 * @param target The target frame buffer, must be valid
606 * @param width The width of the frame in pixel, with range [1, infinity)
607 * @param height The height of the frame in pixel, with range [1, infinity)
608 * @param conversionFlag The conversion to be applied
609 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
610 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
611 * @param worker Optional worker object to distribute the computation
612 * @tparam T Data type of each channel pixel value, e.g., 'uint8_t', 'float', ...
613 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
614 */
615 template <typename T, unsigned int tChannels>
616 static inline void transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker);
617
618 /**
619 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
620 * @param frame The image to convert, must be valid
621 * @param width The width of the image in pixel, with range [1, infinity)
622 * @param height The height of the image in pixel, with range [1, infinity)
623 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
624 * @param worker Optional worker object to distribute the computation
625 * @tparam tChannels The number of frame channels, with range [2, infinity)
626 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
627 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
628 */
629 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
631
632 /**
633 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
634 * @param source The source image to convert, must be valid
635 * @param target The resulting converted target image, must be valid
636 * @param width The width of the image in pixel, with range [1, infinity)
637 * @param height The height of the image in pixel, with range [1, infinity)
638 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
639 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
640 * @param worker Optional worker object to distribute the computation
641 * @tparam tChannels The number of frame channels, with range [2, infinity)
642 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
643 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
644 */
645 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
647
648 /**
649 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
650 * @param frame The image to convert, must be valid
651 * @param width The width of the image in pixel, with range [1, infinity)
652 * @param height The height of the image in pixel, with range [1, infinity)
653 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
654 * @param worker Optional worker object to distribute the computation
655 * @tparam tChannels The number of frame channels, with range [2, infinity)
656 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
657 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
658 */
659 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
661
662 /**
663 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
664 * @param source The source image to convert, must be valid
665 * @param target The resulting converted target image, must be valid
666 * @param width The width of the image in pixel, with range [1, infinity)
667 * @param height The height of the image in pixel, with range [1, infinity)
668 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
669 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
670 * @param worker Optional worker object to distribute the computation
671 * @tparam tChannels The number of frame channels, with range [2, infinity)
672 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
673 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
674 */
675 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
677
678 /**
679 * Reverses/mirrors the order of pixels in a given row (or a memory block in general).
680 * @param source The pointer to the source pixels, must be valid
681 * @param target The pointer to the target pixels receiving the reversed/mirrored pixel data, must be valid
682 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
683 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
684 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
685 */
686 template <typename T, unsigned int tChannels>
687 static void reverseRowPixelOrder(const T* source, T* target, const size_t size);
688
689 /**
690 * Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
691 * @param data The pointer to the pixels, must be valid
692 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
693 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
694 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
695 */
696 template <typename T, unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data, const size_t size);
698
699 /**
700 * Reverses/mirrors the order of channels in a given row (or a memory block in general).
701 * @param source The pointer to the source pixels, must be valid
702 * @param target The pointer to the target pixels receiving the reversed/mirrored channels, must be valid
703 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
704 * @param unusedOptions An unused options parameters, must be nullptr
705 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
706 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
707 */
708 template <typename T, unsigned int tChannels>
709 static void reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
710
711 /**
712 * Shuffles the channels of row pixels by application of a specified shuffle pattern.
713 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
714 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
715 * <pre>
716 * source pixel R G B A
717 * 0 1 2 3
718 * target pixel B G R A
719 * 2 1 0 3
720 * pattern (with reversed order): 0x3012
721 * </pre>
722 * @param source The pointer to the source pixels, must be valid
723 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
724 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
725 * @param unusedOptions An unused options parameters, must be nullptr
726 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
727 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
728 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
729 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
730 */
731 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
733
734 /**
735 * Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last channel with constant value in the target row.
736 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
737 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
738 * <pre>
739 * source pixel R G B
740 * 0 1 2
741 * target pixel B G R A
742 * 2 1 0
743 * pattern (with reversed order): 0x012
744 * </pre>
745 * @param source The pointer to the source pixels, must be valid
746 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
747 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
748 * @param options Pointer to the constant channel value which will be added to the end of the target channels, must be valid
749 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
750 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
751 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
752 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
753 */
754 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options = nullptr);
756
757 /**
758 * Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the four channels.
759 * This function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
760 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
761 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
762 * @param source The pointer to the source pixels, must be valid
763 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
764 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
765 * @param channelMultiplicationFactors_128 The three uint32_t multiplication factors, one for each channel, with range [0, 128], while the sum of all four factors must be 128, must be valid
766 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
767 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
768 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
769 */
770 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
772
773 /**
774 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation.
775 * This function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
776 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
777 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
778 * The transformation is based on the following pattern:
779 * <pre>
780 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
781 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
782 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
783 * </pre>
784 * With t target, s source, f factor, and b bias/translation.<br>
785 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
786 * @param source The pointer to the source pixels, must be valid
787 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
788 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
789 * @param parameters The 12 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128]
790 */
791 static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
792
793 /**
794 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
795 * This function can be used to e.g., convert RGB24 to YUV24, or BGR24 to YVU24.
796 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
797 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
798 * The transformation is based on the following pattern:
799 * <pre>
800 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
801 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
802 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
803 * </pre>
804 * With t target, s source, f factor, and b bias.<br>
805 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
806 * @param source The pointer to the source pixels, must be valid
807 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
808 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
809 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f22_128, b0, b1, b2, with ranges [-127, 127]
810 */
811 static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
812
813 /**
814 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
815 * This function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
816 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
817 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
818 * The transformation is based on the following pattern:
819 * <pre>
820 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
821 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
822 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
823 * </pre>
824 * With t target, s source, f factor, and b bias.<br>
825 * Factors must be specified in relation to a denominator of 1024, bias values must be specified with a denominator of 1.
826 * @param source The pointer to the source pixels, must be valid
827 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
828 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
829 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_1024, f10_1024, f20_1024, f01_1024, f02_1024, ..., f22_1024, b0, b1, b2, with ranges [-1024 * 16, 1024 * 16]
830 */
831 static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
832
833 /**
834 * Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation (for the first three channels).
835 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
836 * This function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
837 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
838 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
839 * The transformation is based on the following pattern:
840 * <pre>
841 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
842 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
843 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
844 * t3 = valueChannel3
845 * </pre>
846 * With t target, s source, f factor, and b bias/translation.<br>
847 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
848 * @param source The pointer to the source pixels, must be valid
849 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
850 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
851 * @param parameters The 13 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128], valueChannel3, with range [0, 255]
852 */
853 static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
854
855 /**
856 * Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the four channels.
857 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
858 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
859 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
860 * <pre>
861 * t0 = f0 * s0 + f1 * s1 + f2 * s2 + f3 * s3
862 * </pre>
863 * @param source The pointer to the source pixels, must be valid
864 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
865 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
866 * @param channelMultiplicationFactors_128 The four uint32_t multiplication factors, one for each channel, with range [0, 127], while the sum of all four factors must be 128, must be valid
867 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
868 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
869 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
870 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
871 */
872 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
874
875 /**
876 * Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the four channels.
877 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
878 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
879 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
880 * The transformation is based on the following pattern:
881 * <pre>
882 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3
883 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3
884 * </pre>
885 * @param source The pointer to the source pixels, must be valid
886 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
887 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
888 * @param multiplicationFactors_128 The 8 int32_t parameters of the column-aligned 2x4 transformation matrix: f00_128, f10_128, f01_128, ..., f13_128, with range [0, 127], while the sum of all four row factors must be 128, must be valid
889 */
890 static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* multiplicationFactors_128);
891
892 /**
893 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
894 * This function can be used to e.g., convert RGBA32 to YUV24, or BGRA24 to YVU24.
895 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
896 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
897 * The transformation is based on the following pattern:
898 * <pre>
899 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3 + b0, 255)
900 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3 + b1, 255)
901 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + f23 * s3 + b2, 255)
902 * </pre>
903 * With t target, s source, f factor, and b bias.<br>
904 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
905 * @param source The pointer to the source pixels, must be valid
906 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
907 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
908 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f23_128, b0, b1, b2, with ranges [-127, 127]
909 */
910 static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
911
912 /**
913 * Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
914 * @param source The pointer to the source pixels, must be valid
915 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
916 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
917 * @param unusedParameters Unused parameter, must be nullptr
918 * @tparam tChannels The number of channels the source (and target) frame have, with range [1, infinity)
919 */
920 template <unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
922
923 /**
924 * Adds a channel to a given row with generic (zipped) pixel format and copies the information of the new channel from a one-channel image.
925 * The channel can be added at new first channel or as new last channel.
926 * @param sources The pointer to the multi-channel source frame and to the single-channel source frame, must be valid
927 * @param targets The one pointer to the target image, must be valid
928 * @param multipleRowIndex The index of the multiple-row to be handled, with range [0, height - 1]
929 * @param width The width of the frame in pixel, with range [1, infinity), must be even
930 * @param height The height of the frame in pixel, with range [1, infinity), must be even
931 * @param conversionFlag The conversion to be applied
932 * @param options The 1 options parameters: padding parameters of 1-channel source frame, must be valid
933 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
934 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
935 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
936 */
937 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
938 static void addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options);
939
940 /**
941 * Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified value.
942 * The channel can be added at new first channel or as new last channel.
943 * @param source The pointer to the source pixels, must be valid
944 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
945 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
946 * @param channelValueParameter The pointer to the value of the channel to be set (with data type 'T'), must be valid
947 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
948 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
949 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
950 */
951 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
952 static void addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter);
953
954 /**
955 * Copies one channel from a source row to a target row with generic (zipped) pixel format.
956 * @param source The pointer to the source pixels, must be valid
957 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
958 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
959 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
960 * @param unusedParameters Unused parameters, must be nullptr
961 * @tparam tSourceChannels Number of channels of the source frame, with range [1, infinity)
962 * @tparam tTargetChannels Number of channels of the target frame, with range [1, infinity)
963 * @tparam tSourceChannelIndex The index of the source channel to be copied, with range [0, tSourceChannels - 1]
964 * @tparam tTargetChannelIndex The index of the target channel to be copied, with range [0, tTargetChannels - 1]
965 */
966 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
967 static void copyChannelRow(const T* source, T* target, const size_t size, const void* unusedParameters = nullptr);
968
969 protected:
970
971 /**
972 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
973 * @param sourceFrame The frame to be separated, must be valid
974 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
975 * @param width The width of the source frame in pixel, with range [1, infinity)
976 * @param height The height of the source frame in pixel, with range [1, infinity)
977 * @param channels The number of channels the source frame has, with range [1, infinity)
978 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
979 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
980 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
981 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
982 */
983 template <typename TSource, typename TTarget>
984 static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
985
986 /**
987 * Zips/interleaves 1-channel images into one image with n-channels.
988 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
989 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
990 * @param width The width of the source frames in pixel, with range [1, infinity)
991 * @param height The height of the source frames in pixel, with range [1, infinity)
992 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
993 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
994 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
995 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
996 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
997 */
998 template <typename TSource, typename TTarget>
999 static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
1000
1001 /**
1002 * Sets one channel of a frame with one unique value.
1003 * @param frame The frame in that one channel of each pixel will be set, must be valid
1004 * @param width The width of the frame in pixel, with range [1, infinity)
1005 * @param value The value to be set
1006 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1007 * @param firstRow First row to be handled
1008 * @param numberRows Number of rows to be handled
1009 * @tparam T Data type of each channel pixel value
1010 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
1011 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
1012 */
1013 template <typename T, unsigned int tChannel, unsigned int tChannels>
1014 static void setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1015
1016 /**
1017 * Applies a specific modifier function on each pixel.
1018 * @param source The source frame providing the pixel information, must be valid
1019 * @param target The target frame receiving the pixel information, must be valid
1020 * @param width The width of the source frame in pixel
1021 * @param height The height of the source frame in pixel
1022 * @param conversionFlag The conversion to be applied
1023 * @param firstRow First row to be handled
1024 * @param numberRows Number of rows to be handled
1025 * @tparam T Data type of each channel pixel value
1026 * @tparam tChannels Number of data channels, with range [1, infinity)
1027 * @tparam tPixelFunction Pixel modification function
1028 */
1029 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1031
1032 /**
1033 * Applies a specific modifier function on each pixel.
1034 * @param source The source frame providing the pixel information, must be valid
1035 * @param target The target frame receiving the pixel information, must be valid
1036 * @param width The width of the source frame in pixel, with range [1, infinity)
1037 * @param height The height of the source frame in pixel, with range [1, infinity)
1038 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
1039 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
1040 * @param conversionFlag The conversion to be applied
1041 * @param firstRow First row to be handled
1042 * @param numberRows Number of rows to be handled
1043 * @tparam TSource Data type of each source channel pixel value
1044 * @tparam TTarget Data type of each target channel pixel value
1045 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
1046 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
1047 * @tparam tPixelFunction Pixel modification function
1048 */
1049 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1051
1052 /**
1053 * Generic bivariate pixel operations
1054 * @param source0 First source frame
1055 * @param source1 Second source frame
1056 * @param target The target frame
1057 * @param width The width of the source frame in pixel, with range [1, infinity)
1058 * @param height The height of the source frame in pixel, with range [1, infinity)
1059 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
1060 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
1061 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1062 * @param conversionFlag The conversion to be applied
1063 * @param firstRow First row to be handled
1064 * @param numberRows Number of rows to be handled
1065 * @tparam TSource0 Type of the first data source
1066 * @tparam TSource1 Type of the second data source
1067 * @tparam TTarget Type of the target
1068 * @tparam TIntermediate Type for the computation of intermediate result, e.g. if TSource0 and TSource1 are different
1069 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
1070 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
1071 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
1072 */
1073 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1075
1076 /**
1077 * Applies a row operator to a subset of all rows of a source image.
1078 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
1079 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
1080 * @param source The source frame to which the row operator is applied, must be valid
1081 * @param target The target frame receiving the result of the row operator, must be valid
1082 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
1083 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
1084 * @param sourceStrideElements The number of stride elements at the end of each source row, in elements, with range [width * tSourceChannels, infinity)
1085 * @param targetStrideElements The number of padding elements at the end of each target row, in elements, with range [width * tTargetChannels, infinity)
1086 * @param rowOperatorFunction The pointer to the row operator function, must be valid
1087 * @param firstRow The first row to be handled, with range [0, height - 1]
1088 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1089 * @tparam TSource The data type of the source elements
1090 * @tparam TTarget The data type of the target elements
1091 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
1092 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
1093 */
1094 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows);
1096
1097 /**
1098 * Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
1099 * @param source The source frame buffer, must be valid
1100 * @param target The target frame buffer, must be valid
1101 * @param width The width of the frame in pixel, with range [1, infinity)
1102 * @param height The height of the frame in pixel, with range [1, infinity)
1103 * @param conversionFlag The conversion to be applied
1104 * @param rowReversePixelOrderFunction The function able to reverse the pixel order, must be valid
1105 * @param bytesPerRow The actual number of bytes each row covers, not including optional padding bytes at the end of each row, with range [width, infinity)
1106 * @param sourceStrideBytes The number of bytes between to start points of successive rows in the source frame, with range [0, infinity)
1107 * @param targetStrideBytes The number of bytes between to start points of successive rows in the target frame, with range [0, infinity)
1108 * @param firstRow The first row to be handled, with range [0, height - 1]
1109 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1110 */
1111 static void transformGenericSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows);
1112
1113 /**
1114 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1115 * @param frame The image to convert, must be valid
1116 * @param width The width of the image in pixel, with range [1, infinity)
1117 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1118 * @param firstRow The first row to be handled, with range [0, height - 1]
1119 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1120 * @tparam tChannels The number of frame channels, with range [2, infinity)
1121 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1122 */
1123 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1125
1126 /**
1127 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1128 * @param source The source image to convert, must be valid
1129 * @param target The resulting converted target image, must be valid
1130 * @param width The width of the image in pixel, with range [1, infinity)
1131 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1132 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1133 * @param firstRow The first row to be handled, with range [0, height - 1]
1134 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1135 * @tparam tChannels The number of frame channels, with range [2, infinity)
1136 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1137 */
1138 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1140
1141 /**
1142 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1143 * @param frame The image to convert, must be valid
1144 * @param width The width of the image in pixel, with range [1, infinity)
1145 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1146 * @param firstRow The first row to be handled, with range [0, height - 1]
1147 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1148 * @tparam tChannels The number of frame channels, with range [2, infinity)
1149 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1150 */
1151 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1153
1154 /**
1155 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1156 * @param source The source image to convert, must be valid
1157 * @param target The resulting converted target image, must be valid
1158 * @param width The width of the image in pixel, with range [1, infinity)
1159 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1160 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1161 * @param firstRow The first row to be handled, with range [0, height - 1]
1162 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1163 * @tparam tChannels The number of frame channels, with range [2, infinity)
1164 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1165 */
1166 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1168
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1170
1171 /**
1172 * Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the three channels.
1173 * This function can be used to e.g., convert RGB24 to Y8, or RGB24 to Y8.
1174 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1175 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1176 * @param source The pointer to the 16 source pixels (with 3 channels = 64 bytes) to convert, must be valid
1177 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1178 * @param multiplicationFactors0_128_u_16x8 The multiplication factor for the first channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1179 * @param multiplicationFactors1_128_u_16x8 The multiplication factor for the second channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1180 * @param multiplicationFactors2_128_u_16x8 The multiplication factor for the third channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1181 */
1182 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8);
1183
1184 /**
1185 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1186 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1187 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1188 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1189 * The transformation is based on the following pattern:
1190 * <pre>
1191 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1192 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1193 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1194 * </pre>
1195 * With t target, s source, f factor, and b bias.
1196 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1197 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1198 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1199 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1200 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1201 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1202 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1203 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1204 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1205 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1206 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1207 * @param biasChannel0_s_16x8 The bias (translation) value for the first target channel, with range [-127, 127]
1208 * @param biasChannel1_s_16x8 The bias (translation) value for the second target channel, with range [-127, 127]
1209 * @param biasChannel2_s_16x8 The bias (translation) value for the third target channel, with range [-127, 127]
1210 */
1211 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8);
1212
1213 /**
1214 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1215 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1216 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1217 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1218 * The transformation is based on the following pattern:
1219 * <pre>
1220 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1221 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1222 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1223 * </pre>
1224 * With t target, s source, f factor, and b bias.
1225 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1226 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1227 * @param factorChannel00_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1228 * @param factorChannel10_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1229 * @param factorChannel20_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1230 * @param factorChannel01_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1231 * @param factorChannel11_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1232 * @param factorChannel21_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1233 * @param factorChannel02_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1234 * @param factorChannel12_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1235 * @param factorChannel22_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1236 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-1024 * 16, 1024 * 16]
1237 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-1024 * 16, 1024 * 16]
1238 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-1024 * 16, 1024 * 16]
1239 */
1240 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4);
1241
1242 /**
1243 * Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the four channels.
1244 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1245 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1246 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1247 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1248 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1249 * @param multiplicationFactors0123_128_s_32x The four individual multiplication factors, one for each channel, with ranges [0, 127], while the sum of all four factors must be 128
1250 */
1251 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x);
1252
1253 /**
1254 * Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear combination of the four channels.
1255 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1256 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1257 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1258 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1259 * @param target The pointer to the 16 target pixels (with 2 channel = 32 bytes) receiving the converted pixel data, must be valid
1260 * @param multiplicationFactorsChannel0_0123_128_s_16x8 The four individual multiplication factors for the first target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1261 * @param multiplicationFactorsChannel1_0123_128_s_16x8 The four individual multiplication factors for the second target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1262 */
1263 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1264
1265#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1266
1267#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1268
1269 /**
1270 * Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the three channels.
1271 * Thus, this function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
1272 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1273 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1274 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1275 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1276 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 128]
1277 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 128 - factorChannel0 - factorChannel2]
1278 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 128 - factorChannel0 - factorChannel1]
1279 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1280 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1281 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1282 */
1283 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
1284 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8);
1285
1286 /**
1287 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1288 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1289 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1290 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1291 * The transformation is based on the following pattern:
1292 * <pre>
1293 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1294 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1295 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1296 * </pre>
1297 * With t target, s source, f factor, and b bias/translation.
1298 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1299 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1300 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1301 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1302 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1303 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1304 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1305 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1306 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1307 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1308 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1309 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1310 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1311 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1312 */
1313 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1314
1315 /**
1316 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1317 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1318 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1319 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1320 * The transformation is based on the following pattern:
1321 * <pre>
1322 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1323 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1324 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1325 * </pre>
1326 * With t target, s source, f factor, and b bias/translation.
1327 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1328 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1329 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1330 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1331 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1332 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1333 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1334 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1335 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1336 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1337 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1338 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1339 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1340 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1341 */
1342 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1343
1344 /**
1345 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1346 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1347 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (also with 128 as denominator).<br>
1348 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1349 * The transformation is based on the following pattern:
1350 * <pre>
1351 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1352 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1353 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1354 * </pre>
1355 * With t target, s source, f factor, and b bias.
1356 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1357 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1358 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1359 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1360 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1361 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1362 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1363 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1364 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1365 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1366 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1367 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1368 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1369 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1370 */
1371 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1372
1373 /**
1374 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1375 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1376 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1377 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1378 * The transformation is based on the following pattern:
1379 * <pre>
1380 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1381 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1382 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1383 * </pre>
1384 * With t target, s source, f factor, and b bias.
1385 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1386 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1387 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1388 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1389 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1390 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1391 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1392 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1393 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1394 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1395 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1396 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1397 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1398 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1399 */
1400 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1401
1402 /**
1403 * Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1404 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1405 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1406 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1407 * The transformation is based on the following pattern:
1408 * <pre>
1409 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1410 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1411 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1412 * </pre>
1413 * With t target, s source, f factor, and b bias.
1414 * @param source The pointer to the 16 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1415 * @param target The pointer to the 16 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1416 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1417 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1418 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1419 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1420 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1421 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1422 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1423 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1424 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1425 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1426 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1427 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1428 */
1429 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1430
1431 /**
1432 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1433 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1434 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1435 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1436 * The transformation is based on the following pattern:
1437 * <pre>
1438 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1439 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1440 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1441 * </pre>
1442 * With t target, s source, f factor, and b bias.
1443 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1444 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1445 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1446 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1447 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1448 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1449 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1450 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1451 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1452 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1453 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1454 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1455 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1456 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1457 */
1458 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1459
1460 /**
1461 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1462 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
1463 * Thus, this function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
1464 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1465 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1466 * The transformation is based on the following pattern:
1467 * <pre>
1468 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1469 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1470 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1471 * t3 = valueChannel3
1472 * </pre>
1473 * With t target, s source, f factor, and b bias.
1474 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1475 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1476 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1477 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1478 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1479 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1480 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1481 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1482 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1483 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1484 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1485 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1486 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1487 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 138]
1488 * @param channelValue3_u_8x16 The constant value for the fourth target channel, with range [0, 255]
1489 */
1490 static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16);
1491
1492 /**
1493 * Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the four channels.
1494 * Thus, this function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1495 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1496 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1497 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1498 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1499 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 127]
1500 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 127 - factorChannel0 - factorChannel2 - factorChannel3]
1501 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel3]
1502 * @param factorChannel3_128_u_8x8 The multiplication factor (8 identical factors) for the fourth channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel2]
1503 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1504 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1505 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1506 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
1507 */
1508 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
1509 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8);
1510
1511 /**
1512 * Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combination of the four channels.
1513 * Thus, this function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1514 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1515 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1516 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1517 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1518 * @param factorChannel00_128_u_8x8 The multiplication factor (8 identical factors) for the first target and first source channel, with range [0, 127]
1519 * @param factorChannel10_128_u_8x8 The multiplication factor (8 identical factors) for the second target and first source channel, with range [0, 127]
1520 * @param factorChannel01_128_u_8x8 The multiplication factor (8 identical factors) for the first target and second source channel, with range [0, 127 - factorChannel00 - factorChannel02 - factorChannel03]
1521 * @param factorChannel11_128_u_8x8 The multiplication factor (8 identical factors) for the second target and second source channel, with range [0, 127 - factorChannel10 - factorChannel12 - factorChannel13]
1522 * @param factorChannel02_128_u_8x8 The multiplication factor (8 identical factors) for the first target and third source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel03]
1523 * @param factorChannel12_128_u_8x8 The multiplication factor (8 identical factors) for the second target and third source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel13]
1524 * @param factorChannel03_128_u_8x8 The multiplication factor (8 identical factors) for the first target and fourth source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel02]
1525 * @param factorChannel13_128_u_8x8 The multiplication factor (8 identical factors) for the second target and fourth source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel12]
1526 */
1527 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8);
1528
1529#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1530
1531};
1532
1533#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1534
1535template <>
1536inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1537{
1538 ocean_assert(sourceFrame != nullptr);
1539 ocean_assert(targetFrames != nullptr);
1540
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1543
1544 constexpr unsigned int tChannels = 2u;
1545
1546 bool allTargetFramesContinuous = true;
1547
1548 if (targetFramesPaddingElements != nullptr)
1549 {
1550 for (unsigned int n = 0u; n < tChannels; ++n)
1551 {
1552 if (targetFramesPaddingElements[n] != 0u)
1553 {
1554 allTargetFramesContinuous = false;
1555 break;
1556 }
1557 }
1558 }
1559
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1563
1564 constexpr unsigned int tBlockSize = 16u;
1565
1566 uint8x16x2_t source_8x16x2;
1567
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1569 {
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1573
1574 for (unsigned int n = 0u; n < blocks; ++n)
1575 {
1576 source_8x16x2 = vld2q_u8(source);
1577
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1580
1581 source += tBlockSize * tChannels;
1582
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1585 }
1586
1587 for (unsigned int n = 0u; n < remaining; ++n)
1588 {
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1591 }
1592 }
1593 else
1594 {
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1597
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1600
1601 for (unsigned int y = 0u; y < height; ++y)
1602 {
1603 for (unsigned int n = 0u; n < blocks; ++n)
1604 {
1605 source_8x16x2 = vld2q_u8(source);
1606
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1609
1610 source += tBlockSize * tChannels;
1611
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1614 }
1615
1616 for (unsigned int n = 0u; n < remaining; ++n)
1617 {
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1620 }
1621
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1625 }
1626 }
1627}
1628
1629template <>
1630inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1631{
1632 ocean_assert(sourceFrame != nullptr);
1633 ocean_assert(targetFrames != nullptr);
1634
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1637
1638 constexpr unsigned int tChannels = 3u;
1639
1640 bool allTargetFramesContinuous = true;
1641
1642 if (targetFramesPaddingElements != nullptr)
1643 {
1644 for (unsigned int n = 0u; n < tChannels; ++n)
1645 {
1646 if (targetFramesPaddingElements[n] != 0u)
1647 {
1648 allTargetFramesContinuous = false;
1649 break;
1650 }
1651 }
1652 }
1653
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1658
1659 constexpr unsigned int tBlockSize = 16u;
1660
1661 uint8x16x3_t source_8x16x3;
1662
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1664 {
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1668
1669 for (unsigned int n = 0u; n < blocks; ++n)
1670 {
1671 source_8x16x3 = vld3q_u8(source);
1672
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1676
1677 source += tBlockSize * tChannels;
1678
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1682 }
1683
1684 for (unsigned int n = 0u; n < remaining; ++n)
1685 {
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1689 }
1690 }
1691 else
1692 {
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1696
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1699
1700 for (unsigned int y = 0u; y < height; ++y)
1701 {
1702 for (unsigned int n = 0u; n < blocks; ++n)
1703 {
1704 source_8x16x3 = vld3q_u8(source);
1705
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1709
1710 source += tBlockSize * tChannels;
1711
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1715 }
1716
1717 for (unsigned int n = 0u; n < remaining; ++n)
1718 {
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1722 }
1723
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1728 }
1729 }
1730}
1731
1732template <>
1733inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1734{
1735 ocean_assert(sourceFrame != nullptr);
1736 ocean_assert(targetFrames != nullptr);
1737
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1740
1741 constexpr unsigned int tChannels = 4u;
1742
1743 bool allTargetFramesContinuous = true;
1744
1745 if (targetFramesPaddingElements != nullptr)
1746 {
1747 for (unsigned int n = 0u; n < tChannels; ++n)
1748 {
1749 if (targetFramesPaddingElements[n] != 0u)
1750 {
1751 allTargetFramesContinuous = false;
1752 break;
1753 }
1754 }
1755 }
1756
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1762
1763 constexpr unsigned int tBlockSize = 16u;
1764
1765 uint8x16x4_t source_8x16x4;
1766
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1768 {
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1772
1773 for (unsigned int n = 0u; n < blocks; ++n)
1774 {
1775 source_8x16x4 = vld4q_u8(source);
1776
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1781
1782 source += tBlockSize * tChannels;
1783
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1788 }
1789
1790 for (unsigned int n = 0u; n < remaining; ++n)
1791 {
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1796 }
1797 }
1798 else
1799 {
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
1804
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1807
1808 for (unsigned int y = 0u; y < height; ++y)
1809 {
1810 for (unsigned int n = 0u; n < blocks; ++n)
1811 {
1812 source_8x16x4 = vld4q_u8(source);
1813
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1818
1819 source += tBlockSize * tChannels;
1820
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1825 }
1826
1827 for (unsigned int n = 0u; n < remaining; ++n)
1828 {
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1833 }
1834
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1840 }
1841 }
1842}
1843
1844#endif // OCEAN_HARDWARE_NEON_VERSION
1845
1846template <typename TSource, typename TTarget, unsigned int tChannels>
1847void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1848{
1849 ocean_assert(sourceFrame != nullptr);
1850 ocean_assert(targetFrames != nullptr);
1851
1852 ocean_assert(width != 0u && height != 0u);
1853
1854 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
1855
1856 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
1857 {
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1859 return;
1860 }
1861
1862#ifdef OCEAN_DEBUG
1863 for (unsigned int c = 0u; c < tChannels; ++c)
1864 {
1865 ocean_assert(targetFrames[c] != nullptr);
1866 }
1867#endif
1868
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
1870 {
1871 for (unsigned int n = 0u; n < width * height; ++n)
1872 {
1873 for (unsigned int c = 0u; c < tChannels; ++c)
1874 {
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1876 }
1877 }
1878 }
1879 else if (targetFramesPaddingElements == nullptr)
1880 {
1881 ocean_assert(sourceFramePaddingElements != 0u);
1882
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1884
1885 for (unsigned int y = 0u; y < height; ++y)
1886 {
1887 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1888
1889 const unsigned int targetRowOffset = y * width;
1890
1891 for (unsigned int x = 0u; x < width; ++x)
1892 {
1893 for (unsigned int c = 0u; c < tChannels; ++c)
1894 {
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1896 }
1897 }
1898 }
1899 }
1900 else
1901 {
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1903
1904 Indices32 targetFrameStrideElements(tChannels);
1905
1906 for (unsigned int c = 0u; c < tChannels; ++c)
1907 {
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1909 }
1910
1911 for (unsigned int y = 0u; y < height; ++y)
1912 {
1913 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1914
1915 for (unsigned int x = 0u; x < width; ++x)
1916 {
1917 for (unsigned int c = 0u; c < tChannels; ++c)
1918 {
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1920 }
1921 }
1922 }
1923 }
1924}
1925
1926template <typename TSource, typename TTarget>
1927void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1928{
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1931
1932 if (targetFrames.size() == 2)
1933 {
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1935 }
1936 else if (targetFrames.size() == 3)
1937 {
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1939 }
1940 else if (targetFrames.size() == 4)
1941 {
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1943 }
1944 else
1945 {
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1947 }
1948}
1949
1950#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1951
1952template <>
1953inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1954{
1955 ocean_assert(sourceFrames != nullptr);
1956 ocean_assert(targetFrame != nullptr);
1957
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1960
1961 constexpr unsigned int tChannels = 2u;
1962
1963 bool allSourceFramesContinuous = true;
1964
1965 if (sourceFramesPaddingElements != nullptr)
1966 {
1967 for (unsigned int n = 0u; n < tChannels; ++n)
1968 {
1969 if (sourceFramesPaddingElements[n] != 0u)
1970 {
1971 allSourceFramesContinuous = false;
1972 break;
1973 }
1974 }
1975 }
1976
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1980
1981 constexpr unsigned int tBlockSize = 16u;
1982
1983 uint8x16x2_t source_8x16x2;
1984
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1986 {
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1990
1991 for (unsigned int n = 0u; n < blocks; ++n)
1992 {
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1995
1996 vst2q_u8(target, source_8x16x2);
1997
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2000
2001 target += tBlockSize * tChannels;
2002 }
2003
2004 for (unsigned int n = 0u; n < remaining; ++n)
2005 {
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2008 }
2009 }
2010 else
2011 {
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2014
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2017
2018 for (unsigned int y = 0u; y < height; ++y)
2019 {
2020 for (unsigned int n = 0u; n < blocks; ++n)
2021 {
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2024
2025 vst2q_u8(target, source_8x16x2);
2026
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2029
2030 target += tBlockSize * tChannels;
2031 }
2032
2033 for (unsigned int n = 0u; n < remaining; ++n)
2034 {
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2037 }
2038
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2042 }
2043 }
2044}
2045
2046template <>
2047inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2048{
2049 ocean_assert(sourceFrames != nullptr);
2050 ocean_assert(targetFrame != nullptr);
2051
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2054
2055 constexpr unsigned int tChannels = 3u;
2056
2057 bool allSourceFramesContinuous = true;
2058
2059 if (sourceFramesPaddingElements != nullptr)
2060 {
2061 for (unsigned int n = 0u; n < tChannels; ++n)
2062 {
2063 if (sourceFramesPaddingElements[n] != 0u)
2064 {
2065 allSourceFramesContinuous = false;
2066 break;
2067 }
2068 }
2069 }
2070
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2075
2076 constexpr unsigned int tBlockSize = 16u;
2077
2078 uint8x16x3_t source_8x16x3;
2079
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2081 {
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2085
2086 for (unsigned int n = 0u; n < blocks; ++n)
2087 {
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2091
2092 vst3q_u8(target, source_8x16x3);
2093
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2097
2098 target += tBlockSize * tChannels;
2099 }
2100
2101 for (unsigned int n = 0u; n < remaining; ++n)
2102 {
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2106 }
2107 }
2108 else
2109 {
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2113
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2116
2117 for (unsigned int y = 0u; y < height; ++y)
2118 {
2119 for (unsigned int n = 0u; n < blocks; ++n)
2120 {
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2124
2125 vst3q_u8(target, source_8x16x3);
2126
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2130
2131 target += tBlockSize * tChannels;
2132 }
2133
2134 for (unsigned int n = 0u; n < remaining; ++n)
2135 {
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2139 }
2140
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2145 }
2146 }
2147}
2148
2149template <>
2150inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2151{
2152 ocean_assert(sourceFrames != nullptr);
2153 ocean_assert(targetFrame != nullptr);
2154
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2157
2158 constexpr unsigned int tChannels = 4u;
2159
2160 bool allSourceFramesContinuous = true;
2161
2162 if (sourceFramesPaddingElements != nullptr)
2163 {
2164 for (unsigned int n = 0u; n < tChannels; ++n)
2165 {
2166 if (sourceFramesPaddingElements[n] != 0u)
2167 {
2168 allSourceFramesContinuous = false;
2169 break;
2170 }
2171 }
2172 }
2173
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2179
2180 constexpr unsigned int tBlockSize = 16u;
2181
2182 uint8x16x4_t source_8x16x4;
2183
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2185 {
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2189
2190 for (unsigned int n = 0u; n < blocks; ++n)
2191 {
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2196
2197 vst4q_u8(target, source_8x16x4);
2198
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2203
2204 target += tBlockSize * tChannels;
2205 }
2206
2207 for (unsigned int n = 0u; n < remaining; ++n)
2208 {
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2213 }
2214 }
2215 else
2216 {
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2221
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2224
2225 for (unsigned int y = 0u; y < height; ++y)
2226 {
2227 for (unsigned int n = 0u; n < blocks; ++n)
2228 {
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2233
2234 vst4q_u8(target, source_8x16x4);
2235
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2240
2241 target += tBlockSize * tChannels;
2242 }
2243
2244 for (unsigned int n = 0u; n < remaining; ++n)
2245 {
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2250 }
2251
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2257 }
2258 }
2259}
2260
2261template <>
2262inline void FrameChannels::zipChannels<float, uint8_t, 2u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2263{
2264 ocean_assert(sourceFrames != nullptr);
2265 ocean_assert(targetFrame != nullptr);
2266
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2269
2270 constexpr unsigned int tChannels = 2u;
2271
2272 bool allSourceFramesContinuous = true;
2273
2274 if (sourceFramesPaddingElements != nullptr)
2275 {
2276 for (unsigned int n = 0u; n < tChannels; ++n)
2277 {
2278 if (sourceFramesPaddingElements[n] != 0u)
2279 {
2280 allSourceFramesContinuous = false;
2281 break;
2282 }
2283 }
2284 }
2285
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2289
2290 constexpr unsigned int tBlockSize = 16u;
2291
2292 uint8x16x2_t target_8x16x2;
2293
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2295 {
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2299
2300 for (unsigned int n = 0u; n < blocks; ++n)
2301 {
2302 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2303 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2304
2305 vst2q_u8(target, target_8x16x2);
2306
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2309
2310 target += tBlockSize * tChannels;
2311 }
2312
2313 for (unsigned int n = 0u; n < remaining; ++n)
2314 {
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2317
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2320 }
2321 }
2322 else
2323 {
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2326
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2329
2330 for (unsigned int y = 0u; y < height; ++y)
2331 {
2332 for (unsigned int n = 0u; n < blocks; ++n)
2333 {
2334 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2335 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2336
2337 vst2q_u8(target, target_8x16x2);
2338
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2341
2342 target += tBlockSize * tChannels;
2343 }
2344
2345 for (unsigned int n = 0u; n < remaining; ++n)
2346 {
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2349
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2352 }
2353
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2357 }
2358 }
2359}
2360
2361template <>
2362inline void FrameChannels::zipChannels<float, uint8_t, 3u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2363{
2364 ocean_assert(sourceFrames != nullptr);
2365 ocean_assert(targetFrame != nullptr);
2366
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2369
2370 constexpr unsigned int tChannels = 3u;
2371
2372 bool allSourceFramesContinuous = true;
2373
2374 if (sourceFramesPaddingElements != nullptr)
2375 {
2376 for (unsigned int n = 0u; n < tChannels; ++n)
2377 {
2378 if (sourceFramesPaddingElements[n] != 0u)
2379 {
2380 allSourceFramesContinuous = false;
2381 break;
2382 }
2383 }
2384 }
2385
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2390
2391 constexpr unsigned int tBlockSize = 16u;
2392
2393 uint8x16x3_t target_8x16x3;
2394
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2396 {
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2400
2401 for (unsigned int n = 0u; n < blocks; ++n)
2402 {
2403 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2404 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2405 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2406
2407 vst3q_u8(target, target_8x16x3);
2408
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2412
2413 target += tBlockSize * tChannels;
2414 }
2415
2416 for (unsigned int n = 0u; n < remaining; ++n)
2417 {
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2421
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2425 }
2426 }
2427 else
2428 {
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2432
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2435
2436 for (unsigned int y = 0u; y < height; ++y)
2437 {
2438 for (unsigned int n = 0u; n < blocks; ++n)
2439 {
2440 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2441 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2442 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2443
2444
2445 vst3q_u8(target, target_8x16x3);
2446
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2450
2451 target += tBlockSize * tChannels;
2452 }
2453
2454 for (unsigned int n = 0u; n < remaining; ++n)
2455 {
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2459
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2463 }
2464
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2469 }
2470 }
2471}
2472
2473template <>
2474inline void FrameChannels::zipChannels<float, uint8_t, 4u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2475{
2476 ocean_assert(sourceFrames != nullptr);
2477 ocean_assert(targetFrame != nullptr);
2478
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2481
2482 constexpr unsigned int tChannels = 4u;
2483
2484 bool allSourceFramesContinuous = true;
2485
2486 if (sourceFramesPaddingElements != nullptr)
2487 {
2488 for (unsigned int n = 0u; n < tChannels; ++n)
2489 {
2490 if (sourceFramesPaddingElements[n] != 0u)
2491 {
2492 allSourceFramesContinuous = false;
2493 break;
2494 }
2495 }
2496 }
2497
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2503
2504 constexpr unsigned int tBlockSize = 16u;
2505
2506 uint8x16x4_t target_8x16x4;
2507
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2509 {
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2513
2514 for (unsigned int n = 0u; n < blocks; ++n)
2515 {
2516 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2517 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2518 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2519 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2520
2521 vst4q_u8(target, target_8x16x4);
2522
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2527
2528 target += tBlockSize * tChannels;
2529 }
2530
2531 for (unsigned int n = 0u; n < remaining; ++n)
2532 {
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2537
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2542 }
2543 }
2544 else
2545 {
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2550
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2553
2554 for (unsigned int y = 0u; y < height; ++y)
2555 {
2556 for (unsigned int n = 0u; n < blocks; ++n)
2557 {
2558 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2559 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2560 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2561 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2562
2563 vst4q_u8(target, target_8x16x4);
2564
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2569
2570 target += tBlockSize * tChannels;
2571 }
2572
2573 for (unsigned int n = 0u; n < remaining; ++n)
2574 {
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2579
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2584 }
2585
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2591 }
2592 }
2593}
2594
2595#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2596
2597template <typename TSource, typename TTarget, unsigned int tChannels>
2598void FrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2599{
2600 ocean_assert(sourceFrames != nullptr);
2601 ocean_assert(targetFrame != nullptr);
2602
2603 ocean_assert(width != 0u && height != 0u);
2604
2605 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
2606
2607 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
2608 {
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2610 return;
2611 }
2612
2613 bool allSourceFramesContinuous = true;
2614
2615 if (sourceFramesPaddingElements != nullptr)
2616 {
2617 for (unsigned int n = 0u; n < tChannels; ++n)
2618 {
2619 if (sourceFramesPaddingElements[n] != 0u)
2620 {
2621 allSourceFramesContinuous = false;
2622 break;
2623 }
2624 }
2625 }
2626
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2628 {
2629 for (unsigned int n = 0u; n < width * height; ++n)
2630 {
2631 for (unsigned int c = 0u; c < tChannels; ++c)
2632 {
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2634 }
2635 }
2636 }
2637 else
2638 {
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2640
2641 Indices32 sourceFrameStrideElements(tChannels);
2642
2643 for (unsigned int c = 0u; c < tChannels; ++c)
2644 {
2645 if (sourceFramesPaddingElements == nullptr)
2646 {
2647 sourceFrameStrideElements[c] = width;
2648 }
2649 else
2650 {
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2652 }
2653 }
2654
2655 for (unsigned int y = 0u; y < height; ++y)
2656 {
2657 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
2658
2659 for (unsigned int x = 0u; x < width; ++x)
2660 {
2661 for (unsigned int c = 0u; c < tChannels; ++c)
2662 {
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2664 }
2665 }
2666 }
2667 }
2668}
2669
2670template <typename TSource, typename TTarget>
2671void FrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
2672{
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2675
2676 if (sourceFrames.size() == 2)
2677 {
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2679 }
2680 else if (sourceFrames.size() == 3)
2681 {
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2683 }
2684 else if (sourceFrames.size() == 4)
2685 {
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2687 }
2688 else
2689 {
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2691 }
2692}
2693
2694template <typename T, unsigned int tSourceChannels>
2695inline void FrameChannels::addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2696{
2697 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2698
2699 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2702
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2704
2705 const void* sources[2] = {source, sourceNewChannel};
2706
2707 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2708}
2709
2710template <typename T, unsigned int tSourceChannels>
2711inline void FrameChannels::addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2712{
2713 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2714
2715 ocean_assert(source != nullptr && target != nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2717
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2719
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2722
2723 const void* channelValueParameter = (const void*)(&newChannelValue);
2724
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2726
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2728}
2729
2730template <typename T, unsigned int tSourceChannels>
2731inline void FrameChannels::addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2732{
2733 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2734
2735 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2738
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2740
2741 const void* sources[2] = {source, sourceNewChannel};
2742
2743 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2744}
2745
2746template <typename T, unsigned int tSourceChannels>
2747inline void FrameChannels::addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2748{
2749 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2750
2751 ocean_assert(source != nullptr && target != nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2753
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2755
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2758
2759 const void* channelValueParameter = (const void*)(&newChannelValue);
2760
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2762
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2764}
2765
2766template <typename T, unsigned int tSourceChannels>
2767inline void FrameChannels::removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2768{
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2770
2771 ocean_assert(source != nullptr && target != nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2773
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2776
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2778
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2780}
2781
2782template <typename T, unsigned int tSourceChannels>
2783inline void FrameChannels::removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2784{
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2786
2787 ocean_assert(source != nullptr && target != nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2789
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2792
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2794
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2796}
2797
2798template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
2799inline void FrameChannels::copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2800{
2801 static_assert(tSourceChannels >= 1u, "Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u, "Invalid number of channels!");
2803
2804 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel index!");
2806
2807 ocean_assert(source != nullptr && target != nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2809
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2812
2813 constexpr RowReversePixelOrderInPlaceFunction<T> reversePixelOrderRowInPlaceFunction = nullptr;
2814
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2816
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous, nullptr, worker);
2818}
2819
2820template <typename T, unsigned int tChannel, unsigned int tChannels>
2821inline void FrameChannels::setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker)
2822{
2823 static_assert(tChannels >= 1u, "Invalid channel number!");
2824 static_assert(tChannel < tChannels, "Invalid channel index!");
2825
2826 ocean_assert(frame != nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2828
2829 if (worker)
2830 {
2831 worker->executeFunction(Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2832 }
2833 else
2834 {
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2836 }
2837}
2838
2839template <typename T, unsigned int tChannels>
2840inline void FrameChannels::reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2841{
2842 static_assert(tChannels >= 1u, "Invalid channel number!");
2843
2844 ocean_assert(source != nullptr && target != nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2846
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2849
2850 constexpr bool areContinuous = false; // even if both images are continuous, we must reverse each line by another
2851
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous, nullptr, worker);
2853}
2854
2855template <typename T, unsigned int tChannels>
2856void FrameChannels::reverseRowPixelOrder(const T* source, T* target, const size_t size)
2857{
2858 static_assert(tChannels >= 1u, "Invalid channel number!");
2859
2860 ocean_assert(source != nullptr && target != nullptr);
2861 ocean_assert(size >= 1);
2862
2863#ifdef OCEAN_DEBUG
2864 const T* const debugSourceStart = source;
2865 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
2866
2867 const T* const debugTargetStart = target;
2868 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
2869#endif
2870
2871 // moving target to the end of the memory block
2872 target += size * tChannels;
2873
2874 const T* const sourceEnd = source + size * tChannels;
2875
2876#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2877
2878 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2879 {
2880 const size_t blocks16 = size / size_t(16);
2881
2882 switch (tChannels)
2883 {
2884 case 1u:
2885 {
2886 for (size_t n = 0; n < blocks16; ++n)
2887 {
2888 target -= 16u * tChannels;
2889
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2892
2893 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2896
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2898
2899 source += 16u * tChannels;
2900 }
2901
2902 break;
2903 }
2904
2905 case 2u:
2906 {
2907 for (size_t n = 0; n < blocks16; ++n)
2908 {
2909 target -= 16u * tChannels;
2910
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2913
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2916
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2919
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2922
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2925
2926 source += 16u * tChannels;
2927 }
2928
2929 break;
2930 }
2931
2932 case 3u:
2933 {
2934 for (size_t n = 0; n < blocks16; ++n)
2935 {
2936 target -= 16u * tChannels;
2937
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2940
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)(source));
2942
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2947
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2949
2950 source += 16u * tChannels;
2951 }
2952
2953 break;
2954 }
2955
2956 case 4u:
2957 {
2958 for (size_t n = 0; n < blocks16; ++n)
2959 {
2960 target -= 16u * tChannels;
2961
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2964
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)(source) + 48);
2969
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2974
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2979
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2984
2985 source += 16u * tChannels;
2986 }
2987
2988 break;
2989 }
2990
2991 default:
2992 break;
2993 }
2994 }
2995
2996#endif // OCEAN_HARDWARE_NEON_VERSION
2997
2998 while (source != sourceEnd)
2999 {
3000 ocean_assert(source < sourceEnd);
3001
3002 for (unsigned int n = 0u; n < tChannels; ++n)
3003 {
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3006
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3008
3009 *--target = source[tChannels - n - 1u];
3010 }
3011
3012 source += tChannels;
3013 }
3014}
3015
3016template <typename T, unsigned int tChannels>
3017void FrameChannels::reverseRowPixelOrderInPlace(T* data, const size_t size)
3018{
3019 static_assert(tChannels >= 1u, "Invalid channel number!");
3020
3021 ocean_assert(data != nullptr);
3022 ocean_assert(size >= 1);
3023
3024 typedef typename DataType<T, tChannels>::Type PixelType;
3025
3026 size_t n = 0;
3027
3028#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3029
3030 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3031 {
3032 if (size >= 32)
3033 {
3034 const size_t blocks32 = size / size_t(32);
3035
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3038
3039 switch (tChannels)
3040 {
3041 case 1u:
3042 {
3043 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3044 {
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3047
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3050
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3053
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3056
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3059 }
3060
3061 n += blocks32 * 16u;
3062
3063 break;
3064 }
3065
3066 case 2u:
3067 {
3068 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3069 {
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3072
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3078
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3084
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3087
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3090 }
3091
3092 n += blocks32 * 16u;
3093
3094 break;
3095 }
3096
3097 case 3u:
3098 {
3099 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3100 {
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3103
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3111
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3119
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3122
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3125 }
3126
3127 n += blocks32 * 16u;
3128
3129 break;
3130 }
3131
3132 case 4u:
3133 {
3134 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3135 {
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3138
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3148
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3158
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3161
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3164 }
3165
3166 n += blocks32 * 16u;
3167
3168 break;
3169 }
3170
3171 default:
3172 break;
3173 }
3174 }
3175 }
3176
3177#endif
3178
3179 PixelType intermediate;
3180
3181 PixelType* const pixels = (PixelType*)(data);
3182
3183 while (n < size / 2)
3184 {
3185 intermediate = pixels[n];
3186
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3189
3190 ++n;
3191 }
3192}
3193
3194template <typename T, unsigned int tChannels>
3195void FrameChannels::reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* /*options*/)
3196{
3197 ocean_assert(source != nullptr && target != nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3200
3201#ifdef OCEAN_DEBUG
3202 const T* const debugSourceStart = source;
3203 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
3204
3205 const T* const debugTargetStart = target;
3206 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
3207#endif
3208
3209 if constexpr (tChannels == 1)
3210 {
3211 // we actually copy the one channel
3212
3213 memcpy(target, source, sizeof(T) * size);
3214 return;
3215 }
3216
3217 const T* const sourceEnd = source + size * tChannels;
3218
3219#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3220
3221 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3222 {
3223 const size_t blocks16 = size / size_t(16);
3224
3225 switch (tChannels)
3226 {
3227 case 1u:
3228 ocean_assert(false && "This should have been handled above!");
3229 break;
3230
3231 case 2u:
3232 {
3233 for (size_t n = 0; n < blocks16; ++n)
3234 {
3235 SSE::reverseChannelOrder2Channel8Bit32Elements((const uint8_t*)source, (uint8_t*)target);
3236
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3239 }
3240
3241 break;
3242 }
3243
3244 case 3u:
3245 {
3246 for (size_t n = 0; n < blocks16; ++n)
3247 {
3248 SSE::reverseChannelOrder3Channel8Bit48Elements((const uint8_t*)source, (uint8_t*)target);
3249
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3252 }
3253
3254 break;
3255 }
3256
3257 case 4u:
3258 {
3259 for (size_t n = 0; n < blocks16; ++n)
3260 {
3261 SSE::reverseChannelOrder4Channel8Bit64Elements((const uint8_t*)source, (uint8_t*)target);
3262
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3265 }
3266
3267 break;
3268 }
3269
3270 default:
3271 break;
3272 }
3273 }
3274
3275#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3276
3277 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3278 {
3279 const size_t blocks16 = size / size_t(16);
3280
3281 switch (tChannels)
3282 {
3283 case 1u:
3284 ocean_assert(false && "This should have been handled above!");
3285 break;
3286
3287 case 2u:
3288 {
3289 for (size_t n = 0; n < blocks16; ++n)
3290 {
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3293
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3296
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3299
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3302
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3305 }
3306
3307 break;
3308 }
3309
3310 case 3u:
3311 {
3312 for (size_t n = 0; n < blocks16; ++n)
3313 {
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3316
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3318
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3323
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3325
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3328 }
3329
3330 break;
3331 }
3332
3333 case 4u:
3334 {
3335 for (size_t n = 0; n < blocks16; ++n)
3336 {
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3339
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)source + 48);
3344
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3349
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3354
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3357 }
3358
3359 break;
3360 }
3361
3362 default:
3363 break;
3364 }
3365 }
3366
3367#endif // OCEAN_HARDWARE_NEON_VERSION
3368
3369 while (source != sourceEnd)
3370 {
3371 ocean_assert(source < sourceEnd);
3372
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3375
3376 for (unsigned int n = 0u; n < tChannels; ++n)
3377 {
3378 target[n] = source[tChannels - n - 1u];
3379 }
3380
3381 source += tChannels;
3382 target += tChannels;
3383 }
3384}
3385
3386template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3387inline void FrameChannels::shuffleRowChannels(const T* source, T* target, const size_t size, const void* /*unusedOptions*/)
3388{
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3391
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3393
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3402
3403 ocean_assert(source != nullptr && target != nullptr);
3404 ocean_assert(size != 0);
3405
3406 const T* const sourceEnd = source + size * tSourceChannels;
3407
3408#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3409
3410 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3411 {
3412 const size_t blocks16 = size / size_t(16);
3413
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3415 {
3416 // 4 -> 4
3417 case (4u | (4u << 4u)):
3418 {
3419 // the following shuffle patterns are known during compile time
3420
3421 constexpr unsigned int offset1 = 0x04040404u;
3422 constexpr unsigned int offset2 = 0x08080808u;
3423 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3424
3425 // converting shufflePattern16 to shufflePattern16
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3427
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3431
3432 const __m128i shufflePattern128 = SSE::set128i((((unsigned long long)shufflePattern3) << 32ull) | (unsigned long long)shufflePattern2, (((unsigned long long)shufflePattern1) << 32ull) | (unsigned long long)shufflePattern0);
3433
3434 for (size_t n = 0; n < blocks16; ++n)
3435 {
3436 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 0), shufflePattern128), (uint8_t*)target + 0);
3437 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 16), shufflePattern128), (uint8_t*)target + 16);
3438 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 32), shufflePattern128), (uint8_t*)target + 32);
3439 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 48), shufflePattern128), (uint8_t*)target + 48);
3440
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3443 }
3444
3445 break;
3446 }
3447
3448 default:
3449 // we do not have a NEON-based optimization
3450 break;
3451 }
3452 }
3453
3454#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3455
3456 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3457 {
3458 const size_t blocks16 = size / size_t(16);
3459
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3461 {
3462 // 1 -> 3
3463 case (1u | (3u << 4u)):
3464 {
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u, "Invalid shuffle patter!");
3466
3467 for (size_t n = 0; n < blocks16; ++n)
3468 {
3469 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3470
3471 uint8x16x3_t target_u_8x16x3;
3472
3473 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3474 {
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3476 }
3477
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3479
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3482 }
3483
3484 break;
3485 }
3486
3487 // 2 -> 1
3488 case (2u | (1u << 4u)):
3489 {
3490 for (size_t n = 0; n < blocks16; ++n)
3491 {
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3493
3494 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u; // possible index values {0, 1}
3495 static_assert(sourceChannel <= 1u, "Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3497
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3499
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3501
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3504 }
3505
3506 break;
3507 }
3508
3509 // 2 -> 3
3510 case (2u | (3u << 4u)):
3511 {
3512 for (size_t n = 0; n < blocks16; ++n)
3513 {
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3515
3516 uint8x16x3_t target_u_8x16x3;
3517
3518 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3519 {
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3521
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3523 }
3524
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3526
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3529 }
3530
3531 break;
3532 }
3533
3534 // 2 -> 4
3535 case (2u | (4u << 4u)):
3536 {
3537 for (size_t n = 0; n < blocks16; ++n)
3538 {
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3540
3541 uint8x16x4_t target_u_8x16x4;
3542
3543 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3544 {
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3546
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3548 }
3549
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3551
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3554 }
3555
3556 break;
3557 }
3558
3559 // 3 -> 1
3560 case (3u | (1u << 4u)):
3561 {
3562 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u; // possible index values {0, 1, 2}
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3564
3565 for (size_t n = 0; n < blocks16; ++n)
3566 {
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3568
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3570
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3572
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3575 }
3576
3577 break;
3578 }
3579
3580 // 3 -> 2
3581 case (3u | (2u << 4u)):
3582 {
3583 for (size_t n = 0; n < blocks16; ++n)
3584 {
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3586
3587 uint8x16x2_t target_u_8x16x2;
3588
3589 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3590 {
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3592 }
3593
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3595
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3598 }
3599
3600 break;
3601 }
3602
3603 // 3 -> 3
3604 case (3u | (3u << 4u)):
3605 {
3606 for (size_t n = 0; n < blocks16; ++n)
3607 {
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3609
3610 uint8x16x3_t target_u_8x16x3;
3611
3612 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3613 {
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3615 }
3616
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3618
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3621 }
3622
3623 break;
3624 }
3625
3626 // 4 -> 1
3627 case (4u | (1u << 4u)):
3628 {
3629 for (size_t n = 0; n < blocks16; ++n)
3630 {
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3632
3633 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u; // possible index values {0, 1, 2, 3}
3634 static_assert(sourceChannel <= 3u, "Invalid shuffle pattern!");
3635
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3637
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3639
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3641
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3644 }
3645
3646 break;
3647 }
3648
3649 // 4 -> 2
3650 case (4u | (2u << 4u)):
3651 {
3652 for (size_t n = 0; n < blocks16; ++n)
3653 {
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3655
3656 uint8x16x2_t target_u_8x16x2;
3657
3658 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3659 {
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3661
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3663 }
3664
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3666
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3669 }
3670
3671 break;
3672 }
3673
3674 // 4 -> 3
3675 case (4u | (3u << 4u)):
3676 {
3677 for (size_t n = 0; n < blocks16; ++n)
3678 {
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3680
3681 uint8x16x3_t target_u_8x16x3;
3682
3683 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3684 {
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3686
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3688 }
3689
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3691
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3694 }
3695
3696 break;
3697 }
3698
3699 // 4 -> 4
3700 case (4u | (4u << 4u)):
3701 {
3702 for (size_t n = 0; n < blocks16; ++n)
3703 {
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3705
3706 uint8x16x4_t target_u_8x16x4;
3707
3708 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3709 {
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3711
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3713 }
3714
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3716
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3719 }
3720
3721 break;
3722 }
3723
3724 default:
3725 // we do not have a NEON-based optimization
3726 break;
3727 }
3728 }
3729
3730#endif
3731
3732 while (source != sourceEnd)
3733 {
3734 ocean_assert(source < sourceEnd);
3735
3736 for (unsigned int n = 0u; n < tTargetChannels; ++n)
3737 {
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3739 }
3740
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3743 }
3744}
3745
3746template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3747inline void FrameChannels::shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options)
3748{
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3751
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3760
3761 ocean_assert(source != nullptr && target != nullptr);
3762 ocean_assert(size != 0);
3763
3764 ocean_assert(options != nullptr);
3765
3766 const T lastChannelValue = *(const T*)(options);
3767
3768 const T* const sourceEnd = source + size * tSourceChannels;
3769
3770#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3771
3772 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3773 {
3774 const size_t blocks16 = size / size_t(16);
3775
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3777 {
3778 // 1 -> 4
3779 case (1u | (4u << 4u)):
3780 {
3781 ocean_assert(tShufflePattern == 0u);
3782
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3784
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3787
3788 for (size_t n = 0; n < blocks16; ++n)
3789 {
3790 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3791
3792 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3793 {
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3795 }
3796
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3798
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3801 }
3802
3803 break;
3804 }
3805
3806 // 3 -> 4
3807 case (3u | (4u << 4u)):
3808 {
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3810
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3813
3814 for (size_t n = 0; n < blocks16; ++n)
3815 {
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3817
3818 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3819 {
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3821 }
3822
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3824
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3827 }
3828
3829 break;
3830 }
3831
3832 // 4 -> 4
3833 case (4u | (4u << 4u)):
3834 {
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3836
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3839
3840 for (size_t n = 0; n < blocks16; ++n)
3841 {
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3843
3844 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3845 {
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)]; // possible index values {0, 1, 2, 3}
3847 }
3848
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3850
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3853 }
3854
3855 break;
3856 }
3857
3858 default:
3859 // we do not have a NEON-based optimization
3860 break;
3861 }
3862 }
3863
3864#endif
3865
3866 while (source != sourceEnd)
3867 {
3868 ocean_assert(source < sourceEnd);
3869
3870 for (unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3871 {
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3874 }
3875
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3878 }
3879}
3880
3881template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3882inline void FrameChannels::shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3883{
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3886
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3888
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3897
3898 ocean_assert(source != nullptr && target != nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3900
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3903
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3905
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, nullptr, worker);
3907}
3908
3909template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3910inline void FrameChannels::shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3911{
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3914
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3923
3924 ocean_assert(source != nullptr && target != nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3926
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3929
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3931
3932 const T options = newChannelValue;
3933
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3935}
3936
3937template <unsigned int tChannels>
3938inline void FrameChannels::narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3939{
3940 static_assert(tChannels >= 1u, "Invalid channel number!");
3941
3942 ocean_assert(source != nullptr && target != nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3944
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3947
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3949
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous, nullptr, worker);
3951}
3952
3953template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
3954void FrameChannels::applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker)
3955{
3956 static_assert(tChannels > 0u, "Invalid channel number!");
3957
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3960
3961 if (worker)
3962 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3963 else
3964 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3965}
3966
3967template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
3968void FrameChannels::applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3969{
3970 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3971 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3972
3973 ocean_assert(source && target);
3974 ocean_assert(width != 0u && height != 0u);
3975
3976 if (worker)
3977 {
3978 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3979 }
3980 else
3981 {
3982 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3983 }
3984}
3985
3986template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3987void FrameChannels::applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3988{
3989 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3990 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3991
3992 ocean_assert(source0 && source1 && target);
3993 ocean_assert(width != 0u && height != 0u);
3994
3995 if (worker)
3996 {
3997 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3998 }
3999 else
4000 {
4001 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4002 }
4003}
4004
4005template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4006void FrameChannels::applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker)
4007{
4008 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
4009 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
4010
4011 ocean_assert(source != nullptr && target != nullptr);
4012 ocean_assert(width != 0u && height != 0u);
4013
4014 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4015 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4016
4017 if (worker)
4018 {
4019 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4020 }
4021 else
4022 {
4023 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4024 }
4025}
4026
4027template <typename T, unsigned int tChannels>
4028inline void FrameChannels::transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4029{
4030 ocean_assert(source != nullptr && target != nullptr);
4031 ocean_assert(width >= 1u && height >= 1u);
4032
4033 const unsigned int bytesPerRow = width * sizeof(T) * tChannels;
4034
4035 const unsigned int sourceStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * sourcePaddingElements;
4036 const unsigned int targetStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * targetPaddingElements;
4037
4038 typedef typename TypeMapper<T>::Type MappedType;
4039
4040 const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction = (const RowReversePixelOrderFunction<void>)(FrameChannels::reverseRowPixelOrder<MappedType, tChannels>);
4041
4042 if (worker && height > 200u)
4043 {
4044 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::transformGenericSubset, (const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4045 }
4046 else
4047 {
4048 transformGenericSubset((const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4049 }
4050}
4051
4052template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4053void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4054{
4055 static_assert(tChannels >= 2u, "Invalid channel number!");
4056 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4057
4058 ocean_assert(frame != nullptr);
4059 ocean_assert(width >= 1u && height >= 1u);
4060
4061 if (worker && height > 200u)
4062 {
4063 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4064 }
4065 else
4066 {
4067 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4068 }
4069}
4070
4071template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4072void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4073{
4074 static_assert(tChannels >= 2u, "Invalid channel number!");
4075 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4076
4077 ocean_assert(source != nullptr && target != nullptr);
4078 ocean_assert(width >= 1u && height >= 1u);
4079
4080 if (worker && height > 200u)
4081 {
4082 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4083 }
4084 else
4085 {
4086 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4087 }
4088}
4089
4090template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4091void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4092{
4093 static_assert(tChannels >= 2u, "Invalid channel number!");
4094 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4095
4096 ocean_assert(frame != nullptr);
4097 ocean_assert(width >= 1u && height >= 1u);
4098
4099 if (worker && height > 200u)
4100 {
4101 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4102 }
4103 else
4104 {
4105 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4106 }
4107}
4108
4109template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4110void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4111{
4112 static_assert(tChannels >= 2u, "Invalid channel number!");
4113 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4114
4115 ocean_assert(source != nullptr && target != nullptr);
4116 ocean_assert(width >= 1u && height >= 1u);
4117
4118 if (worker && height > 200u)
4119 {
4120 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4121 }
4122 else
4123 {
4124 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4125 }
4126}
4127
4128template <unsigned int tChannels>
4129void FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* /* unusedParameters */)
4130{
4131 static_assert(tChannels >= 1u, "Invalid channel number!");
4132
4133 ocean_assert(source != nullptr && target != nullptr);
4134 ocean_assert(size > 0);
4135
4136 const uint16_t* const sourceEnd = source + size * tChannels;
4137
4138#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4139
4140 const size_t blocks8 = size / size_t(8);
4141
4142 switch (tChannels)
4143 {
4144 case 4u:
4145 {
4146 for (size_t n = 0; n < blocks8; ++n)
4147 {
4148 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4149 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4150 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4151 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4152
4153 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8)); // narrowing rounded right shift: target = (source + 128) / 256
4154 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4155
4156 vst1q_u8(target + 0, targetAB_u_8x16);
4157 vst1q_u8(target + 16, targetCD_u_8x16);
4158
4159 source += 8u * tChannels;
4160 target += 8u * tChannels;
4161 }
4162
4163 break;
4164 }
4165
4166 default:
4167 break;
4168 }
4169
4170#endif
4171
4172 while (source != sourceEnd)
4173 {
4174 ocean_assert(source < sourceEnd);
4175
4176 for (unsigned int n = 0u; n < tChannels; ++n)
4177 {
4178 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4179 target[n] = (uint8_t)(source[n] >> 8u);
4180 }
4181
4182 source += tChannels;
4183 target += tChannels;
4184 }
4185}
4186
4187template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4188void FrameChannels::addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options)
4189{
4190 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4191 static_assert(sizeof(size_t) == sizeof(const T*), "Invalid pointer size!");
4192
4193 ocean_assert(sources != nullptr && targets != nullptr);
4194 ocean_assert(width != 0u && height != 0u);
4195 ocean_assert(multipleRowIndex < height);
4196 ocean_assert(options != nullptr);
4197
4198 const T* source = (const T*)(sources[0]);
4199 const T* sourceOneChannel = (const T*)(sources[1]);
4200 ocean_assert(source != nullptr && sourceOneChannel != nullptr);
4201
4202 T* target = (T*)(targets[0]);
4203 ocean_assert(target != nullptr);
4204
4205 const unsigned int* uintOptions = (const unsigned int*)options;
4206 ocean_assert(uintOptions != nullptr);
4207
4208 const unsigned int sourcePaddingElements = uintOptions[0];
4209 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4210 const unsigned int targetPaddingElements = uintOptions[2];
4211
4212 const unsigned int targetChannels = tSourceChannels + 1u;
4213
4214 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4215 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4216 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4217
4218 const bool flipTarget = conversionFlag == CONVERT_FLIPPED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4219 const bool mirrorTarget = conversionFlag == CONVERT_MIRRORED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4220
4221 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4222 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4223 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4224
4225 if (mirrorTarget == false)
4226 {
4227 for (unsigned int n = 0u; n < width; ++n)
4228 {
4229 if constexpr (tAddToFront)
4230 {
4231 targetRow[0] = sourceOneChannelRow[0];
4232
4233 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4234 {
4235 targetRow[c + 1u] = sourceRow[c];
4236 }
4237 }
4238 else
4239 {
4240 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4241 {
4242 targetRow[c] = sourceRow[c];
4243 }
4244
4245 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4246 }
4247
4248 sourceRow += tSourceChannels;
4249 sourceOneChannelRow++;
4250
4251 targetRow += targetChannels;
4252 }
4253 }
4254 else
4255 {
4256 targetRow += targetChannels * (width - 1u);
4257
4258 for (unsigned int n = 0u; n < width; ++n)
4259 {
4260 if constexpr (tAddToFront)
4261 {
4262 targetRow[0] = sourceOneChannelRow[0];
4263
4264 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4265 {
4266 targetRow[c + 1u] = sourceRow[c];
4267 }
4268 }
4269 else
4270 {
4271 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4272 {
4273 targetRow[c] = sourceRow[c];
4274 }
4275
4276 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4277 }
4278
4279 sourceRow += tSourceChannels;
4280 sourceOneChannelRow++;
4281
4282 targetRow -= targetChannels;
4283 }
4284 }
4285}
4286
4287template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4288void FrameChannels::addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter)
4289{
4290 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4291
4292 ocean_assert(source != nullptr && target != nullptr);
4293 ocean_assert(size > 0);
4294 ocean_assert(channelValueParameter != nullptr);
4295
4296 const T& channelValue = *((const T*)channelValueParameter);
4297
4298 const unsigned int targetChannels = tSourceChannels + 1u;
4299
4300 for (size_t n = 0; n < size; ++n)
4301 {
4302 if constexpr (tAddToFront)
4303 {
4304 target[0] = channelValue;
4305
4306 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4307 {
4308 target[c + 1u] = source[c];
4309 }
4310 }
4311 else
4312 {
4313 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4314 {
4315 target[c] = source[c];
4316 }
4317
4318 target[tSourceChannels] = channelValue;
4319 }
4320
4321 source += tSourceChannels;
4322 target += targetChannels;
4323 }
4324}
4325
4326template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
4327void FrameChannels::copyChannelRow(const T* source, T* target, const size_t size, const void* /*unusedParameters*/)
4328{
4329 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4330 static_assert(tTargetChannels != 0u, "Invalid channel number!");
4331
4332 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel number!");
4333 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel number!");
4334
4335 ocean_assert(source != nullptr && target != nullptr);
4336 ocean_assert(size > 0);
4337
4338 for (size_t n = 0; n < size; ++n)
4339 {
4340 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4341
4342 source += tSourceChannels;
4343 target += tTargetChannels;
4344 }
4345}
4346
4347template <typename TSource, typename TTarget>
4348void FrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
4349{
4350 ocean_assert(sourceFrame != nullptr);
4351 ocean_assert(targetFrames != nullptr);
4352
4353 ocean_assert(width != 0u && height != 0u);
4354 ocean_assert(channels != 0u);
4355
4356#ifdef OCEAN_DEBUG
4357 for (unsigned int c = 0u; c < channels; ++c)
4358 {
4359 ocean_assert(targetFrames[c] != nullptr);
4360 }
4361#endif
4362
4363 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
4364 {
4365 for (unsigned int n = 0u; n < width * height; ++n)
4366 {
4367 for (unsigned int c = 0u; c < channels; ++c)
4368 {
4369 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4370 }
4371 }
4372 }
4373 else if (targetFramesPaddingElements == nullptr)
4374 {
4375 ocean_assert(sourceFramePaddingElements != 0u);
4376
4377 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4378
4379 for (unsigned int y = 0u; y < height; ++y)
4380 {
4381 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4382
4383 const unsigned int targetRowOffset = y * width;
4384
4385 for (unsigned int x = 0u; x < width; ++x)
4386 {
4387 for (unsigned int c = 0u; c < channels; ++c)
4388 {
4389 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4390 }
4391 }
4392 }
4393 }
4394 else
4395 {
4396 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4397
4398 Indices32 targetFrameStrideElements(channels);
4399
4400 for (unsigned int c = 0u; c < channels; ++c)
4401 {
4402 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4403 }
4404
4405 for (unsigned int y = 0u; y < height; ++y)
4406 {
4407 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4408
4409 for (unsigned int x = 0u; x < width; ++x)
4410 {
4411 for (unsigned int c = 0u; c < channels; ++c)
4412 {
4413 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4414 }
4415 }
4416 }
4417 }
4418}
4419
4420template <typename TSource, typename TTarget>
4421void FrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
4422{
4423 ocean_assert(sourceFrames != nullptr);
4424 ocean_assert(targetFrame != nullptr);
4425
4426 ocean_assert(width != 0u && height != 0u);
4427 ocean_assert(channels != 0u);
4428
4429 bool allSourceFramesContinuous = true;
4430
4431 if (sourceFramesPaddingElements != nullptr)
4432 {
4433 for (unsigned int n = 0u; n < channels; ++n)
4434 {
4435 if (sourceFramesPaddingElements[n] != 0u)
4436 {
4437 allSourceFramesContinuous = false;
4438 break;
4439 }
4440 }
4441 }
4442
4443 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4444 {
4445 for (unsigned int n = 0u; n < width * height; ++n)
4446 {
4447 for (unsigned int c = 0u; c < channels; ++c)
4448 {
4449 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4450 }
4451 }
4452 }
4453 else
4454 {
4455 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4456
4457 Indices32 sourceFrameStrideElements(channels);
4458
4459 for (unsigned int c = 0u; c < channels; ++c)
4460 {
4461 if (sourceFramesPaddingElements == nullptr)
4462 {
4463 sourceFrameStrideElements[c] = width;
4464 }
4465 else
4466 {
4467 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4468 }
4469 }
4470
4471 for (unsigned int y = 0u; y < height; ++y)
4472 {
4473 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
4474
4475 for (unsigned int x = 0u; x < width; ++x)
4476 {
4477 for (unsigned int c = 0u; c < channels; ++c)
4478 {
4479 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4480 }
4481 }
4482 }
4483 }
4484}
4485
4486template <typename T, unsigned int tChannel, unsigned int tChannels>
4487void FrameChannels::setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4488{
4489 static_assert(tChannels >= 1u, "Invalid channel number!");
4490 static_assert(tChannel < tChannels, "Invalid channel index!");
4491
4492 ocean_assert(frame != nullptr);
4493
4494 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4495
4496 frame += firstRow * frameStrideElements + tChannel;
4497
4498 for (unsigned int n = 0u; n < numberRows; ++n)
4499 {
4500 for (unsigned int x = 0u; x < width; ++x)
4501 {
4502 frame[x * tChannels] = value;
4503 }
4504
4505 frame += frameStrideElements;
4506 }
4507}
4508
4509template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
4510void FrameChannels::applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4511{
4512 static_assert(tChannels >= 1u, "Invalid channel number");
4513
4514 ocean_assert(source && target);
4515 ocean_assert(source != target);
4516
4517 ocean_assert(numberRows > 0u);
4518 ocean_assert(firstRow + numberRows <= height);
4519
4520 const unsigned int widthElements = width * tChannels;
4521 const unsigned int targetBlockSize = widthElements * numberRows;
4522
4523 switch (conversionFlag)
4524 {
4525 case CONVERT_NORMAL:
4526 {
4527 source += firstRow * widthElements;
4528 target += firstRow * widthElements;
4529
4530 const T* const targetEnd = target + targetBlockSize;
4531
4532 while (target != targetEnd)
4533 {
4534 tPixelFunction(source, target);
4535
4536 source += tChannels;
4537 target += tChannels;
4538 }
4539
4540 break;
4541 }
4542
4543 case CONVERT_FLIPPED:
4544 {
4545 source += firstRow * widthElements;
4546 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4547
4548 const T* const targetEnd = target - targetBlockSize;
4549
4550 while (target != targetEnd)
4551 {
4552 const T* const targetRowEnd = target + widthElements;
4553
4554 while (target != targetRowEnd)
4555 {
4556 tPixelFunction(source, target);
4557
4558 source += tChannels;
4559 target += tChannels;
4560 }
4561
4562 target -= (widthElements << 1); // width * tChannels * 2
4563 }
4564
4565 break;
4566 }
4567
4568 case CONVERT_MIRRORED:
4569 {
4570 source += firstRow * widthElements;
4571 target += (firstRow + 1u) * widthElements;
4572
4573 const T* const targetEnd = target + targetBlockSize;
4574
4575 while (target != targetEnd)
4576 {
4577 const T* const targetRowEnd = target - widthElements;
4578
4579 while (target != targetRowEnd)
4580 {
4581 tPixelFunction(source, target -= tChannels);
4582
4583 source += tChannels;
4584 }
4585
4586 target += widthElements << 1; // width * tChannels * 2;
4587 }
4588
4589 break;
4590 }
4591
4593 {
4594 source += firstRow * widthElements;
4595 target += width * height * tChannels - firstRow * widthElements;
4596
4597 const T* const targetEnd = target - targetBlockSize;
4598
4599 while (target != targetEnd)
4600 {
4601 tPixelFunction(source, target -= tChannels);
4602
4603 source += tChannels;
4604 }
4605
4606 break;
4607 }
4608
4609 // default: this case is not handled
4610 }
4611}
4612
4613template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
4614void FrameChannels::applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4615{
4616 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4617 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4618
4619 ocean_assert(source && target);
4620 ocean_assert((void*)source != (void*)target);
4621
4622 ocean_assert(numberRows != 0u);
4623 ocean_assert(firstRow + numberRows <= height);
4624
4625 const unsigned int sourceWidthElements = width * tSourceChannels;
4626 const unsigned int targetWidthElements = width * tTargetChannels;
4627
4628 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4629 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4630
4631 switch (conversionFlag)
4632 {
4633 case CONVERT_NORMAL:
4634 {
4635 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4636 {
4637 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4638 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4639
4640 for (unsigned int x = 0u; x < width; ++x)
4641 {
4642 tPixelFunction(sourcePixel, targetPixel);
4643
4644 sourcePixel += tSourceChannels;
4645 targetPixel += tTargetChannels;
4646 }
4647 }
4648
4649 break;
4650 }
4651
4652 case CONVERT_FLIPPED:
4653 {
4654 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4655 {
4656 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4657 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4658
4659 for (unsigned int x = 0u; x < width; ++x)
4660 {
4661 tPixelFunction(sourcePixel, targetPixel);
4662
4663 sourcePixel += tSourceChannels;
4664 targetPixel += tTargetChannels;
4665 }
4666 }
4667
4668 break;
4669 }
4670
4671 case CONVERT_MIRRORED:
4672 {
4673 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4674 {
4675 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4676
4677 TTarget* const targetRowBegin = target + rowIndex * targetStrideElements;
4678 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4679
4680 for (unsigned int x = 0u; x < width; ++x)
4681 {
4682 ocean_assert(targetPixel >= targetRowBegin);
4683 tPixelFunction(sourcePixel, targetPixel);
4684
4685 sourcePixel += tSourceChannels;
4686 targetPixel -= tTargetChannels;
4687 }
4688 }
4689
4690 break;
4691 }
4692
4694 {
4695 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4696 {
4697 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4698
4699 TTarget* const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4700 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4701
4702 for (unsigned int x = 0u; x < width; ++x)
4703 {
4704 ocean_assert(targetPixel >= targetRowBegin);
4705 tPixelFunction(sourcePixel, targetPixel);
4706
4707 sourcePixel += tSourceChannels;
4708 targetPixel -= tTargetChannels;
4709 }
4710 }
4711
4712 break;
4713 }
4714
4715 // default: this case is not handled
4716 }
4717}
4718
4719template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4720void FrameChannels::applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4721{
4722 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4723 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4724 static_assert(tOperator, "Invalid operator function");
4725
4726 ocean_assert(source0 != nullptr && source1 != nullptr && target != nullptr);
4727 ocean_assert((const void*)(source0) != (const void*)(target));
4728 ocean_assert((const void*)(source1) != (const void*)(target));
4729
4730 ocean_assert(numberRows != 0u);
4731 ocean_assert(firstRow + numberRows <= height);
4732
4733 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4734 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4735
4736 const unsigned int targetWidthElements = width * tTargetChannels;
4737
4738 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4739
4740 switch (conversionFlag)
4741 {
4742 case CONVERT_NORMAL:
4743 {
4744 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4745 {
4746 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4747 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4748
4749 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4750 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4751
4752 while (rowTarget != rowTargetEnd)
4753 {
4754 ocean_assert(rowTarget < rowTargetEnd);
4755
4756 tOperator(rowSource0, rowSource1, rowTarget);
4757
4758 rowSource0 += tSourceChannels;
4759 rowSource1 += tSourceChannels;
4760
4761 rowTarget += tTargetChannels;
4762 }
4763 }
4764
4765 return;
4766 }
4767
4768 case CONVERT_FLIPPED:
4769 {
4770 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4771 {
4772 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4773 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4774
4775 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4776 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4777
4778 while (rowTarget != rowTargetEnd)
4779 {
4780 ocean_assert(rowTarget < rowTargetEnd);
4781
4782 tOperator(rowSource0, rowSource1, rowTarget);
4783
4784 rowSource0 += tSourceChannels;
4785 rowSource1 += tSourceChannels;
4786
4787 rowTarget += tTargetChannels;
4788 }
4789 }
4790
4791 return;
4792 }
4793
4794 case CONVERT_MIRRORED:
4795 {
4796 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4797 {
4798 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4799 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4800
4801 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4802 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4803
4804 while (rowTarget != rowTargetEnd)
4805 {
4806 ocean_assert(rowTarget > rowTargetEnd);
4807
4808 tOperator(rowSource0, rowSource1, rowTarget);
4809
4810 rowSource0 += tSourceChannels;
4811 rowSource1 += tSourceChannels;
4812
4813 rowTarget -= tTargetChannels;
4814 }
4815 }
4816
4817 return;
4818 }
4819
4821 {
4822 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4823 {
4824 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4825 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4826
4827 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4828 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4829
4830 while (rowTarget != rowTargetEnd)
4831 {
4832 ocean_assert(rowTarget > rowTargetEnd);
4833
4834 tOperator(rowSource0, rowSource1, rowTarget);
4835
4836 rowSource0 += tSourceChannels;
4837 rowSource1 += tSourceChannels;
4838
4839 rowTarget -= tTargetChannels;
4840 }
4841 }
4842
4843 return;
4844 }
4845
4846 default:
4847 ocean_assert(false && "This should never happen!");
4848 break;
4849 }
4850}
4851
4852template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4853void FrameChannels::applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
4854{
4855 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4856 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4857
4858 ocean_assert(source != nullptr && target != nullptr);
4859 ocean_assert((const void*)source != (const void*)target);
4860
4861 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4862 ocean_assert(width * tTargetChannels <= targetStrideElements);
4863
4864 ocean_assert(rowOperatorFunction != nullptr);
4865
4866 ocean_assert(numberRows != 0u);
4867 ocean_assert(firstRow + numberRows <= height);
4868
4869 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4870 {
4871 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4872 }
4873}
4874
4875template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
4876void FrameChannels::convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4877{
4878 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid channel factors!");
4879
4880 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4881 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4882 ocean_assert(channelFactors_128 != nullptr);
4883
4884 const unsigned int factorChannel0_128 = channelFactors_128[0];
4885 const unsigned int factorChannel1_128 = channelFactors_128[1];
4886 const unsigned int factorChannel2_128 = channelFactors_128[2];
4887
4888 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4889 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4890
4891 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4892 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4893 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4894
4895 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4896
4897 const uint8_t* const targetEnd = target + size;
4898
4899#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4900
4901 constexpr size_t blockSize = 16;
4902 const size_t blocks = size / blockSize;
4903
4904 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4905 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4906 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4907
4908 for (size_t n = 0; n < blocks; ++n)
4909 {
4910 convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, multiplicationFactors0_128_u_16x8, multiplicationFactors1_128_u_16x8, multiplicationFactors2_128_u_16x8);
4911
4912 source += blockSize * size_t(3);
4913 target += blockSize;
4914 }
4915
4916#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4917
4918 constexpr size_t blockSize = 8;
4919 const size_t blocks = size / blockSize;
4920
4921 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4922 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4923 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4924
4925 for (size_t n = 0; n < blocks; ++n)
4926 {
4927 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4928
4929 source += blockSize * size_t(3);
4930 target += blockSize;
4931 }
4932
4933#endif
4934
4935 while (target != targetEnd)
4936 {
4937 ocean_assert(target < targetEnd);
4938
4939 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4940 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4941 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4942
4943 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4944 source += 3;
4945 }
4946}
4947
4948template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
4949void FrameChannels::convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4950{
4951 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid channel factors!");
4952
4953 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4954 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4955 ocean_assert(channelFactors_128 != nullptr);
4956
4957 const unsigned int factorChannel0_128 = channelFactors_128[0];
4958 const unsigned int factorChannel1_128 = channelFactors_128[1];
4959 const unsigned int factorChannel2_128 = channelFactors_128[2];
4960 const unsigned int factorChannel3_128 = channelFactors_128[3];
4961
4962 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4963 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4964
4965 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4966 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4967 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4968 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4969
4970 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4971
4972 const uint8_t* const targetEnd = target + size;
4973
4974#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4975
4976 constexpr size_t blockSize = 16;
4977 const size_t blocks = size / blockSize;
4978
4979 const __m128i m128_multiplicationFactors = _mm_set1_epi32(int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4980
4981 for (size_t n = 0; n < blocks; ++n)
4982 {
4983 convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, m128_multiplicationFactors);
4984
4985 source += blockSize * size_t(4);
4986 target += blockSize;
4987 }
4988
4989#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4990
4991 constexpr size_t blockSize = 8;
4992 const size_t blocks = size / blockSize;
4993
4994 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4995 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4996 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4997 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
4998
4999 for (size_t n = 0; n < blocks; ++n)
5000 {
5001 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5002
5003 source += blockSize * size_t(4);
5004 target += blockSize;
5005 }
5006
5007#endif
5008
5009 while (target != targetEnd)
5010 {
5011 ocean_assert(target < targetEnd);
5012
5013 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5014 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5015 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5016 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5017
5018 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5019 source += 4;
5020 }
5021}
5022
5023template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5024void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5025{
5026 static_assert(tChannels >= 2u, "Invalid channel number!");
5027 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5028
5029 ocean_assert(frame != nullptr);
5030 ocean_assert(width >= 1u);
5031
5032 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5033
5034 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5035
5036 for (unsigned int y = 0u; y < numberRows; ++y)
5037 {
5038 for (unsigned int x = 0u; x < width; ++x)
5039 {
5040 if (frameRow[tAlphaChannelIndex])
5041 {
5042 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5043
5044 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5045 {
5046 if (channelIndex != tAlphaChannelIndex)
5047 {
5048 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5049 }
5050 }
5051 }
5052
5053 frameRow += tChannels;
5054 }
5055
5056 frameRow += framePaddingElements;
5057 }
5058}
5059
5060template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5061void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5062{
5063 static_assert(tChannels >= 2u, "Invalid channel number!");
5064 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5065
5066 ocean_assert(source != nullptr && target != nullptr);
5067 ocean_assert(width >= 1u);
5068
5069 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5070 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5071
5072 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5073 uint8_t* targetRow = target + targetStrideElements * firstRow;
5074
5075 for (unsigned int y = 0u; y < numberRows; ++y)
5076 {
5077 for (unsigned int x = 0u; x < width; ++x)
5078 {
5079 if (sourceRow[tAlphaChannelIndex])
5080 {
5081 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5082
5083 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5084 {
5085 if (channelIndex != tAlphaChannelIndex)
5086 {
5087 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5088 }
5089 else
5090 {
5091 targetRow[channelIndex] = sourceRow[channelIndex];
5092 }
5093 }
5094 }
5095 else
5096 {
5097 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5098 {
5099 targetRow[channelIndex] = sourceRow[channelIndex];
5100 }
5101 }
5102
5103 sourceRow += tChannels;
5104 targetRow += tChannels;
5105 }
5106
5107 sourceRow += sourcePaddingElements;
5108 targetRow += targetPaddingElements;
5109 }
5110}
5111
5112template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5113void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5114{
5115 static_assert(tChannels >= 2u, "Invalid channel number!");
5116 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5117
5118 ocean_assert(frame != nullptr);
5119 ocean_assert(width >= 1u);
5120
5121 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5122
5123 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5124
5125 for (unsigned int y = 0u; y < numberRows; ++y)
5126 {
5127 for (unsigned int x = 0u; x < width; ++x)
5128 {
5129 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5130 {
5131 if (channelIndex != tAlphaChannelIndex)
5132 {
5133 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5134 }
5135 }
5136
5137 frameRow += tChannels;
5138 }
5139
5140 frameRow += framePaddingElements;
5141 }
5142}
5143
5144template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5145void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5146{
5147 static_assert(tChannels >= 2u, "Invalid channel number!");
5148 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5149
5150 ocean_assert(source != nullptr && target != nullptr);
5151 ocean_assert(width >= 1u);
5152
5153 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5154 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5155
5156 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5157 uint8_t* targetRow = target + targetStrideElements * firstRow;
5158
5159 for (unsigned int y = 0u; y < numberRows; ++y)
5160 {
5161 for (unsigned int x = 0u; x < width; ++x)
5162 {
5163 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5164 {
5165 if (channelIndex != tAlphaChannelIndex)
5166 {
5167 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5168 }
5169 else
5170 {
5171 targetRow[channelIndex] = sourceRow[channelIndex];
5172 }
5173 }
5174
5175 sourceRow += tChannels;
5176 targetRow += tChannels;
5177 }
5178
5179 sourceRow += sourcePaddingElements;
5180 targetRow += targetPaddingElements;
5181 }
5182}
5183
5184#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5185
5186OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8)
5187{
5188 ocean_assert(source != nullptr && target != nullptr);
5189
5190 // the documentation of this function is designed for RGB24 to Y8 conversion
5191 // however, in general this function can be used to apply a linear combination on the four source channels
5192 // to create one output channel
5193
5194 // precise color space conversion:
5195 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5196
5197 // approximation:
5198 // Y = (38 * R + 75 * G + 15 * B) / 128
5199
5200 // we expect the following input pattern (for here RGB24):
5201 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5202 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5203
5204 // we store eight 16 bit values holding 64 for rounding purpose:
5205 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5206
5207 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5208 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5209 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5210
5211 __m128i channel0_u_8x16;
5212 __m128i channel1_u_8x16;
5213 __m128i channel2_u_8x16;
5214 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5215
5216 // now we need 16 bit values instead of 8 bit values
5217
5218 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5219 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5220 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5221
5222 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5223 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5224 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5225
5226 // we multiply each channel with the corresponding multiplication factors
5227
5228 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5229 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5230
5231 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5232 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5233
5234 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5235 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5236
5237 // we sum up all results and add 64 for rounding purpose
5238 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5239 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5240
5241 // we shift the multiplication results by 7 bits (= 128)
5242 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5243 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5244
5245 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5246 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5247
5248 // and we can store the result
5249 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5250}
5251
5252OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8)
5253{
5254 ocean_assert(source != nullptr && target != nullptr);
5255
5256 // the documentation of this function designed for RGB24 to YUV24 conversion
5257
5258 // precise color space conversion:
5259 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5260 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5261 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5262 // | 1 |
5263
5264 // approximation:
5265 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5266 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5267 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5268
5269 // we expect the following input pattern (for here RGB24):
5270 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5271 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5272
5273 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5274 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5275 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5276
5277 __m128i channel0_u_8x16;
5278 __m128i channel1_u_8x16;
5279 __m128i channel2_u_8x16;
5280 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5281
5282 // now we need 16 bit values instead of 8 bit values
5283
5284 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5285 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5286 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5287
5288 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5289 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5290 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5291
5292 // we multiply each channel with the corresponding multiplication factors
5293
5294 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5295 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5296 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5297
5298 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5299 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5300 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5301
5302 // we normalize the result by 128 and add the bias
5303
5304 result0_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_low_u_8x16, 7), biasChannel0_s_16x8);
5305 result1_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_low_u_8x16, 7), biasChannel1_s_16x8);
5306 result2_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_low_u_8x16, 7), biasChannel2_s_16x8);
5307
5308 result0_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_high_u_8x16, 7), biasChannel0_s_16x8);
5309 result1_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_high_u_8x16, 7), biasChannel1_s_16x8);
5310 result2_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_high_u_8x16, 7), biasChannel2_s_16x8);
5311
5312 // from here, we need values within the range [0, 255], so that we clamp the results
5313
5314 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5315
5316 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5317 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5318 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5319
5320 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5321 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5322 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323
5324 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5325 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5326 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5327 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5328
5329 __m128i resultA_u_8x16;
5330 __m128i resultB_u_8x16;
5331 __m128i resultC_u_8x16;
5332 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5333
5334 // and we can store the result
5335 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5336 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5337 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5338}
5339
5340OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4)
5341{
5342 ocean_assert(source != nullptr && target != nullptr);
5343
5344 // the documentation of this function designed for RGB24 to YUV24 conversion
5345
5346 /// precise color space conversion:
5347 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5348 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5349 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5350 // | 1 |
5351
5352 // approximation:
5353 // | R | | 1192 0 1634 -223 | | Y |
5354 // | G | = | 1192 -400 -833 135 | * | U |
5355 // | B | | 1192 2066 0 -277 | | V |
5356 // | 1 |
5357
5358 // we expect the following input pattern (for here RGB24):
5359 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5360 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5361
5362 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5363 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5364 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5365
5366 __m128i channel0_u_8x16;
5367 __m128i channel1_u_8x16;
5368 __m128i channel2_u_8x16;
5369 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5370
5371
5372 // now we need 16 bit values instead of 8 bit values
5373
5374 const __m128i channel0_low_u_16x8 = SSE::removeHighBits16_8(channel0_u_8x16);
5375 const __m128i channel1_low_u_16x8 = SSE::removeHighBits16_8(channel1_u_8x16);
5376 const __m128i channel2_low_u_16x8 = SSE::removeHighBits16_8(channel2_u_8x16);
5377
5378 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5379 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5380 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5381
5382
5383 // we multiply each channel with the corresponding multiplication factors (int16_t * int16_t = int32_t), and we normalize the result by 1024
5384
5385 __m128i result0_low_A_s_32x4;
5386 __m128i result0_low_B_s_32x4;
5387 __m128i result0_high_A_s_32x4;
5388 __m128i result0_high_B_s_32x4;
5389
5390 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel00_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5391 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel00_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5392
5393 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel01_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5394 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel01_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5395
5396 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel02_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5397 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel02_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5398
5399 result0_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5400 result0_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5401 result0_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5402 result0_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5403
5404
5405 __m128i result1_low_A_s_32x4;
5406 __m128i result1_low_B_s_32x4;
5407 __m128i result1_high_A_s_32x4;
5408 __m128i result1_high_B_s_32x4;
5409
5410 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel10_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5411 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel10_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5412
5413 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel11_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5414 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel11_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5415
5416 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel12_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5417 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel12_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5418
5419 result1_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5420 result1_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5421 result1_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5422 result1_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5423
5424
5425 __m128i result2_low_A_s_32x4;
5426 __m128i result2_low_B_s_32x4;
5427 __m128i result2_high_A_s_32x4;
5428 __m128i result2_high_B_s_32x4;
5429
5430 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel20_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5431 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel20_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5432
5433 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel21_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5434 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel21_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5435
5436 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel22_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5437 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel22_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5438
5439 result2_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5440 result2_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5441 result2_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5442 result2_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5443
5444
5445 // now we have int32_t values with 0x0000 or 0xFFFF in the high 16 bits
5446 // thus we can merge 8 int32_t values to 8 int16_t values
5447
5448 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5449
5450 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5451 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5452
5453 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5454 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5455
5456 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5457 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5458
5459
5460 // we combine 16 int16_t values to 16 uint8_t values (saturated)
5461
5462 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5463 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5464 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5465
5466 __m128i resultA_u_8x16;
5467 __m128i resultB_u_8x16;
5468 __m128i resultC_u_8x16;
5469 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5470
5471 // and we can store the result
5472 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5473 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5474 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5475}
5476
5477OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x4)
5478{
5479 ocean_assert(source != nullptr && target != nullptr);
5480
5481 // the documentation of this function is designed for RGBA32 to Y8 conversion
5482 // however, in general this function can be used to apply a linear combination on the four source channels
5483 // to create one output channel
5484
5485 // precise color space conversion:
5486 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5487
5488 // approximation:
5489 // Y = (38 * R + 75 * G + 15 * B) / 128
5490
5491 // we expect the following input pattern (for here RGBA32):
5492 // FEDC BA98 7654 3210
5493 // ABGR ABGR ABGR ABGR
5494
5495 // we calculate:
5496 // (int16_t)((uint8_t)R * (signed char)38) + (int16_t)((uint8_t)G * (signed char)75) for the first 16 bits
5497 // (int16_t)((uint8_t)B * (signed char)15) + (int16_t)((uint8_t)A * (signed char)0) for the second 16 bits
5498
5499 // we store eight 16 bit values holding 64 for rounding purpose:
5500 // FE DC BA 98 76 54 32 10
5501 // 64 64 64 64 64 64 64 64
5502 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5503
5504 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5505 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5506 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5507 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5508
5509 // we get the following pattern
5510 // FE DC BA 98 76 54 32 10
5511 // 0b gr 0b gr 0b gr 0b gr
5512 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5513 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5514 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5515 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5516
5517 // now we sum the pairs of neighboring 16 bit intermediate results
5518 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5519 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5520
5521 // we add 64 for rounding purpose
5522 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5523 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5524
5525 // we shift the multiplication results by 7 bits (= 128)
5526 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5527 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5528
5529 // now we have the following pattern (in two 128 bit registers):
5530 // FEDCBA9876543210
5531 // 0Y0Y0Y0Y0Y0Y0Y0Y
5532
5533 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5534 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5535
5536 // and we can store the result
5537 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5538}
5539
5540void FrameChannels::convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8)
5541{
5542 ocean_assert(source != nullptr && target != nullptr);
5543
5544 // the documentation of this function is designed for RGBA32 to YA16 conversion
5545 // however, in general this function can be used to apply a linear combination on the four source channels
5546 // to create one output channel
5547
5548 // precise color space conversion:
5549 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
5550 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
5551
5552 // approximation:
5553 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
5554 // A = (128 * A) / 128
5555
5556 // we expect the following input pattern (for here RGBA32):
5557 // FEDC BA98 7654 3210
5558 // ABGR ABGR ABGR ABGR
5559
5560 // we store eight 16 bit values holding 64 for rounding purpose:
5561 // FE DC BA 98 76 54 32 10
5562 // 64 64 64 64 64 64 64 64
5563 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5564
5565 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5566 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5567 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5568 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5569
5570 // we convert the 8 bit values to 16 bit values
5571
5572 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5573 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5574
5575 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5576 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5577
5578 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5579 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5580
5581 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5582 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5583
5584 // now we have the following pattern
5585 // FE DC BA 98 76 54 32 10
5586 // 0a 0b 0g 0r 0a 0b 0g 0r
5587
5588 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8); // r * f00 + g * f01 | b * f02 + a * f03 | ...
5589 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5590 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5591 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5592 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5593 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5594 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596
5597 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4); // r * f00 + g * f01 + b * f02 + a * f03 | ...
5598 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5599 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5600 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5601
5602
5603 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8); // r * f10 + g * f11 | b * f12 + a * f13 | ...
5604 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5605 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5606 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5607 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5608 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5609 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611
5612 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4); // r * f10 + g * f11 + b * f12 + a * f13 | ...
5613 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5614 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5615 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5616
5617 // now we interleave the results of first and second channel (as both results fit into 16 bit)
5618
5619 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5620 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5621 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5622 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5623
5624 // we add 64 for rounding purpose
5625 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5626 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5627 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5628 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5629
5630 // we shift the multiplication results by 7 bits (= 128)
5631 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5632 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5633 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5634 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5635
5636 // now we have the following pattern (in two 128 bit registers):
5637 // FEDCBA9876543210
5638 // 0A0Y0A0Y0A0Y0A0Y
5639
5640 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5641 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5642 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5643
5644 // and we can store the result
5645 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5646 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5647}
5648
5649#endif // OCEAN_HARDWARE_SSE_VERSION
5650
5651#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5652
5653template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
5654void FrameChannels::convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8)
5655{
5656 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid multiplication factors!");
5657
5658 ocean_assert(source != nullptr && target != nullptr);
5659
5660 // the documentation of this function designed for RGB24 to Y8 conversion
5661
5662 // precise color space conversion:
5663 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5664
5665 // approximation:
5666 // Y = (38 * R + 75 * G + 15 * B) / 128
5667
5668 // we expect the following input pattern (for here RGB24):
5669 // FEDC BA98 7654 3210
5670 // RBGR BGRB GRBG RBGR
5671
5672 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5673 // source_u_8x8x3.val[0]: R R R R R R R R
5674 // source_u_8x8x3.val[1]: G G G G G G G G
5675 // source_u_8x8x3.val[2]: B B B B B B B B
5676
5677 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5678
5679 uint16x8_t intermediateResults_u_16x8;
5680
5681 // we multiply the first channel with the specified factor (unless zero)
5682
5683 if constexpr (tUseFactorChannel0)
5684 {
5685 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5686 }
5687 else
5688 {
5689 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5690 }
5691
5692 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
5693
5694 if constexpr (tUseFactorChannel1)
5695 {
5696 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5697 }
5698
5699 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
5700
5701 if constexpr (tUseFactorChannel2)
5702 {
5703 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5704 }
5705
5706 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
5707 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7); // results_u_8x8 = (intermediateResults_u_16x8 + 2^6) >> 2^7
5708
5709 // and we can store the result
5710 vst1_u8(target, results_u_8x8);
5711}
5712
5713OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5714{
5715 ocean_assert(source != nullptr && target != nullptr);
5716
5717 // the documentation of this function designed for YUV24 to RGB24 conversion
5718
5719 // precise color space conversion:
5720 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5721 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5722 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5723 // | 1 |
5724
5725 // approximation:
5726 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5727 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5728 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5729
5730 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5731 // source_u_8x8x3.val[0]: R R R R R R R R
5732 // source_u_8x8x3.val[1]: G G G G G G G G
5733 // source_u_8x8x3.val[2]: B B B B B B B B
5734
5735 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5736
5737 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5738 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5739 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5740 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5741
5742 // now we apply the 3x3 matrix multiplication
5743
5744 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5745 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5746 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5747
5748 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5749 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5750 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5751
5752 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5753 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5754 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5755
5756 uint8x8x3_t results_u_8x8x3;
5757
5758 // saturated narrow signed to unsigned, normalized by 2^6
5759 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5760 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5761 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5762
5763 // and we can store the result
5764 vst3_u8(target, results_u_8x8x3);
5765}
5766
5767OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5768{
5769 ocean_assert(source != nullptr && target != nullptr);
5770
5771 // the documentation of this function designed for YUV24 to RGB24 conversion
5772
5773 // precise color space conversion:
5774 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5775 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5776 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5777 // | 1 |
5778
5779 // approximation:
5780 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5781 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5782 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5783
5784 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5785
5786 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5787 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5788 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5789 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5790
5791 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5792 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5793 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5794
5795 // now we mulitply apply the 3x3 matrix multiplication
5796
5797 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5798 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5799 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5800
5801 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5802 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5803 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5804
5805 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5806 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5807 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5808
5809 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5810 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5811 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5812
5813 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5814 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5815 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5816
5817 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5818 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5819 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5820
5821 uint8x16x3_t results_u_8x16x3;
5822
5823 // saturated narrow signed to unsigned, normalized by 2^6
5824 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5825 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5826 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5827
5828 // and we can store the result
5829 vst3q_u8(target, results_u_8x16x3);
5830}
5831
5832OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
5833{
5834 ocean_assert(source != nullptr && target != nullptr);
5835
5836 // the documentation of this function designed for RGB24 to YUV24 conversion
5837
5838 // precise color space conversion:
5839 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5840 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5841 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5842 // | 1 |
5843
5844 // approximation:
5845 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5846 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5847 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5848
5849 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5850 // source_u_8x8x3.val[0]: R R R R R R R R
5851 // source_u_8x8x3.val[1]: G G G G G G G G
5852 // source_u_8x8x3.val[2]: B B B B B B B B
5853
5854 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5855
5856 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5857 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5858 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5859
5860 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5861 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5862 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5863
5864 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5865 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5866 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5867
5868 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5869 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5870 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5871
5872 // now we add the bias values (saturated)
5873
5874 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5875 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5876 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5877
5878 uint8x8x3_t results_u_8x8x3;
5879
5880 // saturated narrow signed to unsigned
5881 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5882 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5883 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5884
5885 // and we can store the result
5886 vst3_u8(target, results_u_8x8x3);
5887}
5888
5889OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5890{
5891 ocean_assert(source != nullptr && target != nullptr);
5892
5893 // the documentation of this function designed for YUV24 to RGB24 conversion
5894
5895 // precise color space conversion:
5896 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5897 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5898 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5899 // | 1 |
5900
5901 // approximation:
5902 // | R | | 1192 0 1634 -223 | | Y |
5903 // | G | = | 1192 -400 -833 135 | * | U |
5904 // | B | | 1192 2066 0 -277 | | V |
5905 // | 1 |
5906
5907 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5908 // source_u_8x8x3.val[0]: R R R R R R R R
5909 // source_u_8x8x3.val[1]: G G G G G G G G
5910 // source_u_8x8x3.val[2]: B B B B B B B B
5911
5912 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5913
5914 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5915 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5916 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5917
5918 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5919 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5920
5921 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5922 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5923
5924 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5925 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5926
5927 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5928 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5929
5930
5931 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5932 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5933
5934 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5935 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5936
5937 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5938 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5939
5940 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5941 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5942
5943
5944 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5945 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5946
5947 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5948 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5949
5950 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5951 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5952
5953 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5954 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5955
5956
5957 // now we add the bias values (saturated)
5958
5959 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5960 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5961
5962 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5963 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5964
5965 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5966 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5967
5968
5969 uint8x8x3_t results_u_8x8x3;
5970
5971 // saturated narrow signed to unsigned
5972 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5973 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5974 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5975
5976 // and we can store the result
5977 vst3_u8(target, results_u_8x8x3);
5978}
5979
5980OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5981{
5982 ocean_assert(source != nullptr && target != nullptr);
5983
5984 // the documentation of this function designed for YUV24 to RGB24 conversion
5985
5986 // precise color space conversion:
5987 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5988 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5989 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5990 // | 1 |
5991
5992 // approximation:
5993 // | R | | 1192 0 1634 -223 | | Y |
5994 // | G | = | 1192 -400 -833 135 | * | U |
5995 // | B | | 1192 2066 0 -277 | | V |
5996 // | 1 |
5997
5998 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5999 // source_u_8x8x3.val[0]: R R R R R R R R
6000 // source_u_8x8x3.val[1]: G G G G G G G G
6001 // source_u_8x8x3.val[2]: B B B B B B B B
6002
6003 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6004
6005 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6006 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6007 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6008
6009 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6010 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6011 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6012
6013 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6014 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6015 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6016 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6017
6018 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6019 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6020 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6021 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6022
6023 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6024 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6025 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6026 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6027
6028 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6029 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6030 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6031 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6032
6033
6034 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6035 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6036 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6037 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6038
6039 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6040 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6041 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6042 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6043
6044 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6045 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6046 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6047 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6048
6049 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6050 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6051 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6052 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6053
6054
6055 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6056 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6057 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6058 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6059
6060 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6061 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6062 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6063 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6064
6065 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6066 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6067 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6068 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6069
6070 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6071 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6072 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6073 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6074
6075
6076 // now we add the bias values (saturated)
6077
6078 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6079 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6080 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6081 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6082
6083 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6084 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6085 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6086 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6087
6088 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6089 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6090 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6091 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6092
6093
6094 uint8x16x3_t results_u_8x16x3;
6095
6096 // saturated narrow signed to unsigned
6097 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6098
6099 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6100 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6101
6102 // and we can store the result
6103 vst3q_u8(target, results_u_8x16x3);
6104}
6105
6106OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
6107{
6108 ocean_assert(source != nullptr && target != nullptr);
6109
6110 // the documentation of this function designed for RGB24 to YUV24 conversion
6111
6112 // precise color space conversion:
6113 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
6114 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
6115 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
6116 // | 1 |
6117
6118 // approximation:
6119 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
6120 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
6121 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
6122
6123 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6124 // source_u_8x8x3.val[0]: R R R R R R R R
6125 // source_u_8x8x3.val[1]: G G G G G G G G
6126 // source_u_8x8x3.val[2]: B B B B B B B B
6127
6128 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6129
6130 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6131 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6132 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6133
6134 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6135 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6136 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6137
6138
6139 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6140 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6141 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6142
6143 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6144 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6145 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6146
6147
6148 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6149 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6150 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6151
6152 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6153 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6154 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6155
6156
6157 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6158 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6159 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6160
6161 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6162 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6163 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6164
6165 // now we add the bias values (saturated)
6166
6167 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6168 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6169
6170 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6171 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6172
6173 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6174 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6175
6176
6177 uint8x16x3_t results_u_8x16x3;
6178
6179 // saturated narrow signed to unsigned shift with rounding
6180 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6181 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6182 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6183
6184 // and we can store the result
6185 vst3q_u8(target, results_u_8x16x3);
6186}
6187
6188OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16)
6189{
6190 ocean_assert(source != nullptr && target != nullptr);
6191
6192 // the documentation of this function designed for YUV24 to RGB24 conversion
6193
6194 // precise color space conversion:
6195 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
6196 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
6197 // | B | | 1 1.732446 0.0 -221.753088 | | V |
6198 // | 1 |
6199
6200 // approximation:
6201 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
6202 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
6203 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
6204
6205 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6206
6207 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
6208 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6209 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6210 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6211
6212 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6213 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6214 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6215
6216 // now we mulitply apply the 3x3 matrix multiplication
6217
6218 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6219 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6220 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6221
6222 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6223 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6224 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6225
6226 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
6227 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6228 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6229
6230 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6231 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6232 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6233
6234 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6235 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6236 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6237
6238 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6239 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6240 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6241
6242 uint8x16x4_t results_u_8x16x4;
6243
6244 // saturated narrow signed to unsigned, normalized by 2^6
6245 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6246 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6247 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6248 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6249
6250 // and we can store the result
6251 vst4q_u8(target, results_u_8x16x4);
6252}
6253
6254template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
6255void FrameChannels::convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8)
6256{
6257 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid multiplication factors!");
6258
6259 ocean_assert(source != nullptr && target != nullptr);
6260
6261 // the documentation of this function designed for RGBA32 to Y8 conversion
6262
6263 // precise color space conversion:
6264 // Y = 0.299 * R + 0.587 * G + 0.114 * B
6265
6266 // approximation:
6267 // Y = (38 * R + 75 * G + 15 * B) / 128
6268
6269 // we expect the following input pattern (for here RGBA32):
6270 // FEDC BA98 7654 3210
6271 // ABGR ABGR ABGR ABGR
6272
6273 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6274 // m4_64_pixels.val[0]: R R R R R R R R
6275 // m4_64_pixels.val[1]: G G G G G G G G
6276 // m4_64_pixels.val[2]: B B B B B B B B
6277 // m4_64_pixels.val[3]: A A A A A A A A
6278
6279 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6280
6281 uint16x8_t intermediateResults_16x8;
6282
6283 // we multiply the first channel with the specified factor (unless zero)
6284
6285 if constexpr (tUseFactorChannel0)
6286 {
6287 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6288 }
6289 else
6290 {
6291 intermediateResults_16x8 = vdupq_n_u16(0u);
6292 }
6293
6294 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
6295
6296 if constexpr (tUseFactorChannel1)
6297 {
6298 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6299 }
6300
6301 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
6302
6303 if constexpr (tUseFactorChannel2)
6304 {
6305 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6306 }
6307
6308 // we multiply the fourth channel with the specified factor (unless zero) and accumulate the results
6309
6310 if constexpr (tUseFactorChannel3)
6311 {
6312 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6313 }
6314
6315 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6316 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7); // pixels_u_8x8x4 = (intermediateResults_16x8 + 2^6) >> 2^7
6317
6318 // and we can store the result
6319 vst1_u8(target, results_u_8x8);
6320}
6321
6322OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8)
6323{
6324 ocean_assert(source != nullptr && target != nullptr);
6325
6326 // the documentation of this function designed for RGBA32 to YA16 conversion
6327
6328 // precise color space conversion:
6329 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
6330 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
6331
6332 // approximation:
6333 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
6334 // A = (128 * A) / 128
6335
6336 // we expect the following input pattern (for here RGBA32):
6337 // FEDC BA98 7654 3210
6338 // ABGR ABGR ABGR ABGR
6339
6340 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6341 // m4_64_pixels.val[0]: R R R R R R R R
6342 // m4_64_pixels.val[1]: G G G G G G G G
6343 // m4_64_pixels.val[2]: B B B B B B B B
6344 // m4_64_pixels.val[3]: A A A A A A A A
6345
6346 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6347
6348 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6349 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6350
6351 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6352 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6353
6354 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6355 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6356
6357 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6358 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6359
6360 uint8x8x2_t results_u_8x8x2;
6361
6362 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6363
6364 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7); // results_u_8x8x2.val[0] = (intermediateResultsChannel0_16x8 + 2^6) >> 2^7
6365 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6366
6367 // and we can store the result
6368 vst2_u8(target, results_u_8x8x2);
6369}
6370
6371#endif // OCEAN_HARDWARE_NEON_VERSION
6372
6373}
6374
6375}
6376
6377#endif // META_OCEAN_CV_FRAME_CHANNELS_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5340
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4288
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6188
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4188
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5186
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4876
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4949
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4091
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4006
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5767
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4487
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4720
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3968
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5713
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:5980
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5889
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4853
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4129
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4614
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4053
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6106
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5832
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4327
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5477
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4028
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5113
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5252
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:3987
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6322
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4348
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4421
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4510
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5024
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5540
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3211
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:589
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:580
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3234
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1208
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3108
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3619
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3764
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 16 bit values by applying a right shift.
Definition SSE.h:3066
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:3909
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3345
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3387
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3799
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
Definition SSE.h:3304
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3770
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3412
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:3900
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3372
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2876
This class implements Ocean's image class.
Definition Frame.h:1808
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1771
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32