Ocean
FrameChannels.h
Go to the documentation of this file.
1 /*
2  * Copyright (c) Meta Platforms, Inc. and affiliates.
3  *
4  * This source code is licensed under the MIT license found in the
5  * LICENSE file in the root directory of this source tree.
6  */
7 
8 #ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9 #define META_OCEAN_CV_FRAME_CHANNELS_H
10 
11 #include "ocean/cv/CV.h"
13 #include "ocean/cv/NEON.h"
14 #include "ocean/cv/SSE.h"
15 
16 #include "ocean/base/DataType.h"
17 #include "ocean/base/Frame.h"
18 #include "ocean/base/Worker.h"
19 
20 namespace Ocean
21 {
22 
23 namespace CV
24 {
25 
26 /**
27  * This class implements frame channel conversion, transformation and extraction functions.
28  * @ingroup cv
29  */
30 class OCEAN_CV_EXPORT FrameChannels : public FrameConverter
31 {
32  public:
33 
34  /**
35  * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
36  */
37  static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
38 
39  /**
40  * Definition of a function pointer to a function able to operate on an entire image row.
41  */
42  template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
43  using RowOperatorFunction = void(*)(const TSource* sourceRow, TTarget* targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
44 
45  /**
46  * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
47  * Best practice is to avoid using these functions if binary size matters,<br>
48  * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
49  */
50  class OCEAN_CV_EXPORT Comfort
51  {
52  public:
53 
54  /**
55  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
56  * Usage:
57  * @code
58  * Frame rgbSourceFrame = ...;
59  *
60  * Frames targetFrames;
61  *
62  * if (separateTo1Channel(rgbSourceFrame, targetFrames))
63  * {
64  * ocean_assert(targetFrames.size() == 3);
65  *
66  * // do something with targetFrames
67  * }
68  * @endcode
69  * @param sourceFrame The frame to be separated, must be valid
70  * @param targetFrames The resulting frames each holding one channel of the source frame, will be set automatically
71  * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
72  * @return True, if succeeded
73  */
74  static bool separateTo1Channel(const Frame& sourceFrame, Frames& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
75 
76  /**
77  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
78  * Usage:
79  * @code
80  * Frame rgbSourceFrame = ...;
81  *
82  * Frame targetFrameA;
83  * Frame targetFrameB;
84  * Frame targetFrameC;
85  *
86  * if (separateTo1Channel(rgbSourceFrame, {&targetFrameA, &targetFrameB, &targetFrameC}))
87  * {
88  * // do something with targetFrames
89  * }
90  * @endcode
91  * @param sourceFrame The frame to be separated, must be valid
92  * @param targetFrames The resulting frames each holding one channel of the source frame, one for each source channels
93  * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
94  * @return True, if succeeded
95  */
96  static bool separateTo1Channel(const Frame& sourceFrame, const std::initializer_list<Frame*>& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
97 
98  /**
99  * Zips/interleaves 1-channel images into one image with n-channels.
100  * Usage:
101  * @code
102  * Frame sourceFrameA = ...;
103  * Frame sourceFrameB = ...;
104  * Frame sourceFrameC = ...;
105  *
106  * Frame targetFrame;
107  * if (zipChannels({sourceFrameA, sourceFrameB, sourceFrameC}, targetFrame))
108  * {
109  * ocean_assert(targetFrame.channels() == 3u);
110  *
111  * // do something with targetFrame
112  * }
113  * @endcode
114  * @param sourceFrames The frames to be zipped/interleaved, must be valid
115  * @param targetFrame The resulting frame holding n channels, will be set automatically
116  * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
117  * @return True, if succeeded
118  */
119  static bool zipChannels(const std::initializer_list<Frame>& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
120 
121  /**
122  * Zips/interleaves 1-channel images into one image with n-channels.
123  * Usage:
124  * @code
125  * Frames sourceFrames = ...;
126  *
127  * Frame targetFrame;
128  * if (zipChannels(sourceFrames, targetFrame))
129  * {
130  * ocean_assert(targetFrame.channels() == sourceFrames.size());
131  *
132  * // do something with targetFrame
133  * }
134  * @endcode
135  * @param sourceFrames The frames to be zipped/interleaved, must be valid
136  * @param targetFrame The resulting frame holding n channels, will be set automatically
137  * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
138  * @return True, if succeeded
139  */
140  static bool zipChannels(const Frames& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
141 
142  /**
143  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
144  * @param frame The image to convert, must be valid
145  * @param worker Optional worker object to distribute the computation
146  * @return True, if succeeded
147  * @see straightAlphaToPremultipliedAlpha().
148  */
149  static bool premultipliedAlphaToStraightAlpha(Frame& frame, Worker* worker = nullptr);
150 
151  /**
152  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
153  * @param source The source image to convert, must be valid
154  * @param target The resulting converted target image, the frame type will be changed if it is not match to the source frame
155  * @param worker Optional worker object to distribute the computation
156  * @return True, if succeeded
157  * @see straightAlphaToPremultipliedAlpha().
158  */
159  static bool premultipliedAlphaToStraightAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
160 
161  /**
162  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
163  * @param frame The image to convert, must be valid
164  * @param worker Optional worker object to distribute the computation
165  * @see premultipliedAlphaToStraightAlpha().
166  */
167  static bool straightAlphaToPremultipliedAlpha(Frame& frame, Worker* worker = nullptr);
168 
169  /**
170  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
171  * @param source The source image to convert, must be valid
172  * @param target The resulting converted target image, must be valid
173  * @param worker Optional worker object to distribute the computation
174  * @see premultipliedAlphaToStraightAlpha().
175  */
176  static bool straightAlphaToPremultipliedAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
177  };
178 
179  /**
180  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
181  * Usage:
182  * @code
183  * const unsigned int width = ...;
184  * const unsigned int height = ...;
185  *
186  * uint8_t* sourceFrame = ...;
187  * const unsigned int sourceFramePaddingElements = ...;
188  *
189  * constexpr unsigned int channels = 2u;
190  *
191  * const uint8_t* targetFrames[channels] = {..., ...};
192  * const unsigned int targetFramesPaddingElements[2] = {..., ...};
193  *
194  * separateTo1Channel<uint8_t, uint8_t, channels>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
195  * @endcode
196  * @param sourceFrame The frame to be separated, must be valid
197  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
198  * @param width The width of the source frame in pixel, with range [1, infinity)
199  * @param height The height of the source frame in pixel, with range [1, infinity)
200  * @param channels The number of channels the source frame has, with range [1, infinity)
201  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
202  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
203  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
204  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
205  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
206  */
207  template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208  static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
209 
210  /**
211  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
212  * Usage:
213  * @code
214  * const unsigned int width = ...;
215  * const unsigned int height = ...;
216  *
217  * const uint8_t* sourceFrame = ...;
218  * const unsigned int sourceFramePaddingElements = ...;
219  *
220  * uint8_t* targetFrame0 = ...;
221  * uint8_t* targetFrame1 = ...;
222  * const unsigned int targetFramePaddingElements0 = ...;
223  * const unsigned int targetFramePaddingElements1 = ...;
224  *
225  * separateTo1Channel<uint8_t, uint8_t>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
226  * @endcode
227  * @param sourceFrame The frame to be separated, must be valid
228  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
229  * @param width The width of the source frame in pixel, with range [1, infinity)
230  * @param height The height of the source frame in pixel, with range [1, infinity)
231  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
232  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
233  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
234  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
235  */
236  template <typename TSource, typename TTarget>
237  static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
238 
239  /**
240  * Zips/interleaves 1-channel images into one image with n-channels.
241  * Usage:
242  * @code
243  * const unsigned int width = ...;
244  * const unsigned int height = ...;
245  *
246  * const uint8_t* sourceFrames[2] = {..., ...};
247  * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
248  *
249  * uint8_t* targetFrame = ...;
250  * const unsigned int targetFramePaddingElements = ...;
251  *
252  * zipChannels<uint8_t, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFramesPaddingElements, targetFramePaddingElements);
253  * @endcode
254  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
255  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
256  * @param width The width of the source frames in pixel, with range [1, infinity)
257  * @param height The height of the source frames in pixel, with range [1, infinity)
258  * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
259  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
260  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
261  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
262  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
263  * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
264  */
265  template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266  static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
267 
268  /**
269  * Zips/interleaves 1-channel images into one image with n-channels.
270  * Usage:
271  * @code
272  * const unsigned int width = ...;
273  * const unsigned int height = ...;
274  *
275  * const uint8_t* sourceFrame0 = ...;
276  * const uint8_t* sourceFrame1 = ...;
277  * const unsigned int sourceFramePaddingElements0 = ...;
278  * const unsigned int sourceFramePaddingElements1 = ...;
279  *
280  * uint8_t* targetFrame = ...;
281  * const unsigned int targetFramePaddingElements = ...;
282  *
283  * zipChannels<uint8_t, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
284  * @endcode
285  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
286  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
287  * @param width The width of the source frames in pixel, with range [1, infinity)
288  * @param height The height of the source frames in pixel, with range [1, infinity)
289  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
290  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
291  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
292  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
293  */
294  template <typename TSource, typename TTarget>
295  static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
296 
297  /**
298  * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the front of all existing channels.
299  * @param source The source frame to which the new channel will be added, must be valid
300  * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
301  * @param target The target frame receiving the joined channels, must be valid
302  * @param width The width of the frames in pixel, with range [1, infinity)
303  * @param height The height of the frames in pixel, with range [1, infinity)
304  * @param conversionFlag The conversion to be applied
305  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
306  * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
307  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
308  * @param worker Optional worker object to distribute the computational load
309  * @tparam T Data type of each channel pixel value
310  * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
311  */
312  template <typename T, unsigned int tSourceChannels>
313  static inline void addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
314 
315  /**
316  * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
317  * @param source The source frame that provided the existing channels
318  * @param newChannelValue Value that will be assigned to the new channel for each pixel
319  * @param target The target frame to that the existing channels and the new channel will be added (as new first channel)
320  * @param width The width of the frames in pixel, with range [1, infinity)
321  * @param height The height of the frames in pixel, with range [1, infinity)
322  * @param conversionFlag The conversion to be applied
323  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
324  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
325  * @param worker Optional worker object to distribute the computational load
326  * @tparam T Data type of each channel pixel value
327  * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
328  */
329  template <typename T, unsigned int tSourceChannels>
330  static inline void addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
331 
332  /**
333  * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the back of all existing channels.
334  * @param source The source frame to which the new channel will be added, must be valid
335  * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
336  * @param target The target frame receiving the joined channels, must be valid
337  * @param width The width of the frames in pixel, with range [1, infinity)
338  * @param height The height of the frames in pixel, with range [1, infinity)
339  * @param conversionFlag The conversion to be applied
340  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
341  * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
342  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
343  * @param worker Optional worker object to distribute the computational load
344  * @tparam T Data type of each channel pixel value
345  * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
346  */
347  template <typename T, unsigned int tSourceChannels>
348  static inline void addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
349 
350  /**
351  * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
352  * @param source The source frame that provided the existing channels
353  * @param newChannelValue Value that will be assigned to the new channel for each pixel
354  * @param target The target frame to that the existing channels and the new channel will be added (as new last channel)
355  * @param width The width of the frames in pixel, with range [1, infinity)
356  * @param height The height of the frames in pixel, with range [1, infinity)
357  * @param conversionFlag The conversion to be applied
358  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
359  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
360  * @param worker Optional worker object to distribute the computational load
361  * @tparam T Data type of each channel pixel value
362  * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
363  */
364  template <typename T, unsigned int tSourceChannels>
365  static inline void addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
366 
367  /**
368  * Removes the first channel from a given frame with zipped (generic) pixel format.
369  * This function is mainly a wrapper around FrameChannels::shuffleChannels().
370  * @param source The source frame from that the first channel will be removed, must be valid
371  * @param target The target frame without the first channel, must be valid
372  * @param width The width of the frames in pixel, with range [1, infinity)
373  * @param height The height of the frames in pixel, with range [1, infinity)
374  * @param conversionFlag The conversion to be applied
375  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
376  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
377  * @param worker Optional worker object to distribute the computational load
378  * @tparam T Data type of each channel pixel value
379  * @tparam tSourceChannels Number of channels of the source frame (including the channel that will be removed), with range [2, infinity)
380  * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeLastChannel().
381  */
382  template <typename T, unsigned int tSourceChannels>
383  static inline void removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
384 
385  /**
386  * Removes the last channel from a given frame with zipped (generic) pixel format.
387  * This function is mainly a wrapper around FrameChannels::shuffleChannels().
388  * @param source The source frame from that the first channel will be removed, must be valid
389  * @param target The target frame without the first channel, must be valid
390  * @param width The width of the frames in pixel, with range [1, infinity)
391  * @param height The height of the frames in pixel, with range [1, infinity)
392  * @param conversionFlag The conversion to be applied
393  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
394  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
395  * @param worker Optional worker object to distribute the computational load
396  * @tparam T Data type of each channel pixel value
397  * @tparam tSourceChannels Number of channels of the frame (including the channel that will be removed), with range [2, infinity)
398  * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeFirstChannel().
399  */
400  template <typename T, unsigned int tSourceChannels>
401  static inline void removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
402 
403  /**
404  * Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel format.
405  * @param source The source frame from that the channel will be copied, must be valid
406  * @param target The target frame to which the channel will be copied, must be valid
407  * @param width The width of both frames in pixel, with range [1, infinity)
408  * @param height The height of both frames in pixel, with range [1, infinity)
409  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
410  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
411  * @param worker Optional worker object to distribute the computational load
412  * @tparam T Data type of each channel pixel value
413  * @tparam tSourceChannels Number of channels in the source frame, with range [1, infinity)
414  * @tparam tTargetChannels Number of channels in the target frame, with range [1, infinity)
415  * @tparam tSourceChannelIndex The index of the source channel that will be copied, with range [0, tSourceChannels - 1]
416  * @tparam tTargetChannelIndex The index of the target channel that will be copied, with range [0, tTargetChannels - 1]
417  */
418  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
419  static inline void copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
420 
421  /**
422  * Sets one channel of a frame with a specific unique value.
423  * @param frame The frame in that one channel of each pixel will be set
424  * @param width The width of the frame in pixel, with range [1, infinity)
425  * @param height The height of the frame in pixel, with range [1, infinity)
426  * @param value The value to be set
427  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
428  * @param worker Optional worker object to distribute the computation
429  * @tparam T Data type of each channel pixel value
430  * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
431  * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
432  */
433  template <typename T, unsigned int tChannel, unsigned int tChannels>
434  static inline void setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker = nullptr);
435 
436  /**
437  * Reverses the order of the channels of a frame with zipped pixel format.
438  * The first channel will be exchanged with the last channel, the second channel will be exchanged with the second last channel and so on.
439  * @param source The source frame from that the channels will be swapped, must be valid
440  * @param target The target frame that receives the swapped channels, must be valid
441  * @param width The width of the source frame in pixel, with range (0, infinity)
442  * @param height The height of the source frame in pixel, with range (0, infinity)
443  * @param conversionFlag The conversion to be applied
444  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
445  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
446  * @param worker Optional worker object to distribute the computation
447  * @tparam T Data type of each channel pixel value
448  * @tparam tChannels Number of data channels, with range [1, infinity)
449  */
450  template <typename T, unsigned int tChannels>
451  static inline void reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
452 
453  /**
454  * Shuffles the channels of a frame by an arbitrary pattern.
455  * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
456  * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
457  * <pre>
458  * source pixel R G B A
459  * 0 1 2 3
460  * target pixel B G R A
461  * 2 1 0 3
462  * pattern (with reversed order): 0x3012
463  * </pre>
464  * @param source The source frame for which the channels will be shuffled, must be valid
465  * @param target The target frame that receives the shuffled channels, must be valid
466  * @param width The width of the source frame in pixel, with range [1, infinity)
467  * @param height The height of the source frame in pixel, with range [1, infinity)
468  * @param conversionFlag The conversion to be applied
469  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
470  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
471  * @param worker Optional worker object to distribute the computation
472  * @tparam T Data type of each channel pixel value
473  * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
474  * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
475  * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
476  */
477  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
478  static inline void shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
479 
480  /**
481  * Shuffles the channels of source frame and sets the last channel with constant value in the target frame.
482  * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
483  * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
484  * <pre>
485  * source pixel R G B
486  * 0 1 2
487  * target pixel B G R A
488  * 2 1 0
489  * pattern (with reversed order): 0x012
490  * </pre>
491  * @param source The source frame for which the channels will be shuffled, must be valid
492  * @param newChannelValue The constant channel value which will be added as last channel to the target frame, with range [0, infinity)
493  * @param target The target frame that receives the shuffled channels, must be valid
494  * @param width The width of the source frame in pixel, with range [1, infinity)
495  * @param height The height of the source frame in pixel, with range [1, infinity)
496  * @param conversionFlag The conversion to be applied
497  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
498  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
499  * @param worker Optional worker object to distribute the computation
500  * @tparam T Data type of each channel pixel value
501  * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
502  * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
503  * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
504  */
505  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
506  static inline void shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
507 
508  /**
509  * Narrows 16 bit channels of a frame to 8 bit channels.
510  * @param source The source frame for which the channels will be narrowed, must be valid
511  * @param target The target frame that receives the narrowed channels, must be valid
512  * @param width The width of the source frame in pixel, with range [1, infinity)
513  * @param height The height of the source frame in pixel, with range [1, infinity)
514  * @param conversionFlag The conversion to be applied
515  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
516  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
517  * @param worker Optional worker object to distribute the computation
518  * @tparam tChannels Number of source data channels, with range [1, infinity)
519  */
520  template <unsigned int tChannels>
521  static inline void narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
522 
523  /**
524  * Applies a specific modifier function on each pixel.
525  * @param source The source frame providing the pixel information, must be valid
526  * @param target The target frame receiving the pixel information, must be valid
527  * @param width The width of the source frame in pixel, with range (0, infinity)
528  * @param height The height of the source frame in pixel, with range (0, infinity)
529  * @param conversionFlag The conversion to be applied
530  * @param worker Optional worker object to distribute the computation
531  * @tparam T Data type of each channel pixel value
532  * @tparam tChannels Number of data channels, with range [1, infinity)
533  * @tparam tPixelFunction Pixel modification function
534  */
535  template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
536  static void applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker = nullptr);
537 
538  /**
539  * Applies a specific modifier function on each pixel.
540  * @param source The source frame providing the pixel information, must be valid
541  * @param target The target frame receiving the pixel information, must be valid
542  * @param width The width of the source frame in pixel, with range [1, infinity)
543  * @param height The height of the source frame in pixel, with range [1, infinity)
544  * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
545  * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
546  * @param conversionFlag The conversion to be applied
547  * @param worker Optional worker object to distribute the computation
548  * @tparam TSource Data type of each source channel pixel value
549  * @tparam TTarget Data type of each target channel pixel value
550  * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
551  * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
552  * @tparam tPixelFunction Pixel modification function
553  */
554  template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
555  static void applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
556 
557  /**
558  * Generic bivariate pixel operations
559  * Applies bivariate per-pixel operators: `C(y, x) = op(A(y, x), B(y, x))`. Input and output must have the same frame type and have a single plane.
560  * @param source0 First source frame
561  * @param source1 Second source frame
562  * @param target The target frame
563  * @param width The width of the source frame in pixel, with range [1, infinity)
564  * @param height The height of the source frame in pixel, with range [1, infinity)
565  * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
566  * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
567  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
568  * @param conversionFlag The conversion to be applied
569  * @param worker Optional worker object to distribute the computation
570  * @tparam TSource0 Type of the first data source
571  * @tparam TSource1 Type of the second data source
572  * @tparam TTarget Type of the target
573  * @tparam TIntermediate Data type that is used for the computation of intermediate results, e.g. if TSource0 and TSource1 are different
574  * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
575  * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
576  * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
577  */
578  template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579  static void applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
580 
581  /**
582  * Applies a row operator to all rows of a source image.
583  * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
584  * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
585  * @param source The source frame to which the row operator is applied, must be valid
586  * @param target The target frame receiving the result of the row operator, must be valid
587  * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
588  * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
589  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
590  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
591  * @param rowOperatorFunction The pointer to the row operator function, must be valid
592  * @param worker Optional worker object to distribute the computation
593  * @tparam TSource The data type of the source elements
594  * @tparam TTarget The data type of the target elements
595  * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
596  * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
597  */
598  template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
599  static void applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker = nullptr);
600 
601  /**
602  * Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
603  * This function mainly mirrors or flips an image.
604  * @param source The source frame buffer, must be valid
605  * @param target The target frame buffer, must be valid
606  * @param width The width of the frame in pixel, with range [1, infinity)
607  * @param height The height of the frame in pixel, with range [1, infinity)
608  * @param conversionFlag The conversion to be applied
609  * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
610  * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
611  * @param worker Optional worker object to distribute the computation
612  * @tparam T Data type of each channel pixel value, e.g., 'uint8_t', 'float', ...
613  * @tparam tChannels Number of channels of the frame, with range [1, infinity)
614  */
615  template <typename T, unsigned int tChannels>
616  static inline void transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker);
617 
618  /**
619  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
620  * @param frame The image to convert, must be valid
621  * @param width The width of the image in pixel, with range [1, infinity)
622  * @param height The height of the image in pixel, with range [1, infinity)
623  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
624  * @param worker Optional worker object to distribute the computation
625  * @tparam tChannels The number of frame channels, with range [2, infinity)
626  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
627  * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
628  */
629  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
630  static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
631 
632  /**
633  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
634  * @param source The source image to convert, must be valid
635  * @param target The resulting converted target image, must be valid
636  * @param width The width of the image in pixel, with range [1, infinity)
637  * @param height The height of the image in pixel, with range [1, infinity)
638  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
639  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
640  * @param worker Optional worker object to distribute the computation
641  * @tparam tChannels The number of frame channels, with range [2, infinity)
642  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
643  * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
644  */
645  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
646  static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
647 
648  /**
649  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
650  * @param frame The image to convert, must be valid
651  * @param width The width of the image in pixel, with range [1, infinity)
652  * @param height The height of the image in pixel, with range [1, infinity)
653  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
654  * @param worker Optional worker object to distribute the computation
655  * @tparam tChannels The number of frame channels, with range [2, infinity)
656  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
657  * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
658  */
659  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
660  static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
661 
662  /**
663  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
664  * @param source The source image to convert, must be valid
665  * @param target The resulting converted target image, must be valid
666  * @param width The width of the image in pixel, with range [1, infinity)
667  * @param height The height of the image in pixel, with range [1, infinity)
668  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
669  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
670  * @param worker Optional worker object to distribute the computation
671  * @tparam tChannels The number of frame channels, with range [2, infinity)
672  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
673  * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
674  */
675  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
676  static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
677 
678  /**
679  * Reverses/mirrors the order of pixels in a given row (or a memory block in general).
680  * @param source The pointer to the source pixels, must be valid
681  * @param target The pointer to the target pixels receiving the reversed/mirrored pixel data, must be valid
682  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
683  * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
684  * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
685  */
686  template <typename T, unsigned int tChannels>
687  static void reverseRowPixelOrder(const T* source, T* target, const size_t size);
688 
689  /**
690  * Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
691  * @param data The pointer to the pixels, must be valid
692  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
693  * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
694  * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
695  */
696  template <typename T, unsigned int tChannels>
697  static void reverseRowPixelOrderInPlace(T* data, const size_t size);
698 
699  /**
700  * Reverses/mirrors the order of channels in a given row (or a memory block in general).
701  * @param source The pointer to the source pixels, must be valid
702  * @param target The pointer to the target pixels receiving the reversed/mirrored channels, must be valid
703  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
704  * @param unusedOptions An unused options parameters, must be nullptr
705  * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
706  * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
707  */
708  template <typename T, unsigned int tChannels>
709  static void reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
710 
711  /**
712  * Shuffles the channels of row pixels by application of a specified shuffle pattern.
713  * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
714  * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
715  * <pre>
716  * source pixel R G B A
717  * 0 1 2 3
718  * target pixel B G R A
719  * 2 1 0 3
720  * pattern (with reversed order): 0x3012
721  * </pre>
722  * @param source The pointer to the source pixels, must be valid
723  * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
724  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
725  * @param unusedOptions An unused options parameters, must be nullptr
726  * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
727  * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
728  * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
729  * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
730  */
731  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
732  static inline void shuffleRowChannels(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
733 
734  /**
735  * Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last channel with constant value in the target row.
736  * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
737  * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
738  * <pre>
739  * source pixel R G B
740  * 0 1 2
741  * target pixel B G R A
742  * 2 1 0
743  * pattern (with reversed order): 0x012
744  * </pre>
745  * @param source The pointer to the source pixels, must be valid
746  * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
747  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
748  * @param options Pointer to the constant channel value which will be added to the end of the target channels, must be valid
749  * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
750  * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
751  * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
752  * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
753  */
754  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
755  static inline void shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options = nullptr);
756 
757  /**
758  * Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the four channels.
759  * This function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
760  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
761  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
762  * @param source The pointer to the source pixels, must be valid
763  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
764  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
765  * @param channelMultiplicationFactors_128 The three uint32_t multiplication factors, one for each channel, with range [0, 128], while the sum of all four factors must be 128, must be valid
766  * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
767  * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
768  * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
769  */
770  template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
771  static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
772 
773  /**
774  * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation.
775  * This function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
776  * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
777  * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
778  * The transformation is based on the following pattern:
779  * <pre>
780  * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
781  * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
782  * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
783  * </pre>
784  * With t target, s source, f factor, and b bias/translation.<br>
785  * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
786  * @param source The pointer to the source pixels, must be valid
787  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
788  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
789  * @param parameters The 12 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128]
790  */
791  static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
792 
793  /**
794  * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
795  * This function can be used to e.g., convert RGB24 to YUV24, or BGR24 to YVU24.
796  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
797  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
798  * The transformation is based on the following pattern:
799  * <pre>
800  * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
801  * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
802  * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
803  * </pre>
804  * With t target, s source, f factor, and b bias.<br>
805  * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
806  * @param source The pointer to the source pixels, must be valid
807  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
808  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
809  * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f22_128, b0, b1, b2, with ranges [-127, 127]
810  */
811  static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
812 
813  /**
814  * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
815  * This function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
816  * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
817  * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
818  * The transformation is based on the following pattern:
819  * <pre>
820  * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
821  * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
822  * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
823  * </pre>
824  * With t target, s source, f factor, and b bias.<br>
825  * Factors must be specified in relation to a denominator of 1024, bias values must be specified with a denominator of 1.
826  * @param source The pointer to the source pixels, must be valid
827  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
828  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
829  * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_1024, f10_1024, f20_1024, f01_1024, f02_1024, ..., f22_1024, b0, b1, b2, with ranges [-1024 * 16, 1024 * 16]
830  */
831  static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
832 
833  /**
834  * Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation (for the first three channels).
835  * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
836  * This function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
837  * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
838  * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
839  * The transformation is based on the following pattern:
840  * <pre>
841  * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
842  * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
843  * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
844  * t3 = valueChannel3
845  * </pre>
846  * With t target, s source, f factor, and b bias/translation.<br>
847  * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
848  * @param source The pointer to the source pixels, must be valid
849  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
850  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
851  * @param parameters The 13 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128], valueChannel3, with range [0, 255]
852  */
853  static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
854 
855  /**
856  * Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the four channels.
857  * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
858  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
859  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
860  * <pre>
861  * t0 = f0 * s0 + f1 * s1 + f2 * s2 + f3 * s3
862  * </pre>
863  * @param source The pointer to the source pixels, must be valid
864  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
865  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
866  * @param channelMultiplicationFactors_128 The four uint32_t multiplication factors, one for each channel, with range [0, 127], while the sum of all four factors must be 128, must be valid
867  * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
868  * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
869  * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
870  * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
871  */
872  template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
873  static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
874 
875  /**
876  * Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the four channels.
877  * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
878  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
879  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
880  * The transformation is based on the following pattern:
881  * <pre>
882  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3
883  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3
884  * </pre>
885  * @param source The pointer to the source pixels, must be valid
886  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
887  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
888  * @param multiplicationFactors_128 The 8 int32_t parameters of the column-aligned 2x4 transformation matrix: f00_128, f10_128, f01_128, ..., f13_128, with range [0, 127], while the sum of all four row factors must be 128, must be valid
889  */
890  static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* multiplicationFactors_128);
891 
892  /**
893  * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
894  * This function can be used to e.g., convert RGBA32 to YUV24, or BGRA24 to YVU24.
895  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
896  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
897  * The transformation is based on the following pattern:
898  * <pre>
899  * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3 + b0, 255)
900  * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3 + b1, 255)
901  * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + f23 * s3 + b2, 255)
902  * </pre>
903  * With t target, s source, f factor, and b bias.<br>
904  * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
905  * @param source The pointer to the source pixels, must be valid
906  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
907  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
908  * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f23_128, b0, b1, b2, with ranges [-127, 127]
909  */
910  static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
911 
912  /**
913  * Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
914  * @param source The pointer to the source pixels, must be valid
915  * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
916  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
917  * @param unusedParameters Unused parameter, must be nullptr
918  * @tparam tChannels The number of channels the source (and target) frame have, with range [1, infinity)
919  */
920  template <unsigned int tChannels>
921  static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
922 
923  /**
924  * Adds a channel to a given row with generic (zipped) pixel format and copies the information of the new channel from a one-channel image.
925  * The channel can be added at new first channel or as new last channel.
926  * @param sources The pointer to the multi-channel source frame and to the single-channel source frame, must be valid
927  * @param targets The one pointer to the target image, must be valid
928  * @param multipleRowIndex The index of the multiple-row to be handled, with range [0, height - 1]
929  * @param width The width of the frame in pixel, with range [1, infinity), must be even
930  * @param height The height of the frame in pixel, with range [1, infinity), must be even
931  * @param conversionFlag The conversion to be applied
932  * @param options The 1 options parameters: padding parameters of 1-channel source frame, must be valid
933  * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
934  * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
935  * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
936  */
937  template <typename T, unsigned int tSourceChannels, bool tAddToFront>
938  static void addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options);
939 
940  /**
941  * Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified value.
942  * The channel can be added at new first channel or as new last channel.
943  * @param source The pointer to the source pixels, must be valid
944  * @param target The pointer to the target pixels, receiving the additional channels, must be valid
945  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
946  * @param channelValueParameter The pointer to the value of the channel to be set (with data type 'T'), must be valid
947  * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
948  * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
949  * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
950  */
951  template <typename T, unsigned int tSourceChannels, bool tAddToFront>
952  static void addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter);
953 
954  /**
955  * Copies one channel from a source row to a target row with generic (zipped) pixel format.
956  * @param source The pointer to the source pixels, must be valid
957  * @param target The pointer to the target pixels, receiving the additional channels, must be valid
958  * @param size The number of source (and target pixels) to convert, with range [1, infinity)
959  * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
960  * @param unusedParameters Unused parameters, must be nullptr
961  * @tparam tSourceChannels Number of channels of the source frame, with range [1, infinity)
962  * @tparam tTargetChannels Number of channels of the target frame, with range [1, infinity)
963  * @tparam tSourceChannelIndex The index of the source channel to be copied, with range [0, tSourceChannels - 1]
964  * @tparam tTargetChannelIndex The index of the target channel to be copied, with range [0, tTargetChannels - 1]
965  */
966  template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
967  static void copyChannelRow(const T* source, T* target, const size_t size, const void* unusedParameters = nullptr);
968 
969  protected:
970 
971  /**
972  * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
973  * @param sourceFrame The frame to be separated, must be valid
974  * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
975  * @param width The width of the source frame in pixel, with range [1, infinity)
976  * @param height The height of the source frame in pixel, with range [1, infinity)
977  * @param channels The number of channels the source frame has, with range [1, infinity)
978  * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
979  * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
980  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
981  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
982  */
983  template <typename TSource, typename TTarget>
984  static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
985 
986  /**
987  * Zips/interleaves 1-channel images into one image with n-channels.
988  * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
989  * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
990  * @param width The width of the source frames in pixel, with range [1, infinity)
991  * @param height The height of the source frames in pixel, with range [1, infinity)
992  * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
993  * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
994  * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
995  * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
996  * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
997  */
998  template <typename TSource, typename TTarget>
999  static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
1000 
1001  /**
1002  * Sets one channel of a frame with one unique value.
1003  * @param frame The frame in that one channel of each pixel will be set, must be valid
1004  * @param width The width of the frame in pixel, with range [1, infinity)
1005  * @param value The value to be set
1006  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1007  * @param firstRow First row to be handled
1008  * @param numberRows Number of rows to be handled
1009  * @tparam T Data type of each channel pixel value
1010  * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
1011  * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
1012  */
1013  template <typename T, unsigned int tChannel, unsigned int tChannels>
1014  static void setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1015 
1016  /**
1017  * Applies a specific modifier function on each pixel.
1018  * @param source The source frame providing the pixel information, must be valid
1019  * @param target The target frame receiving the pixel information, must be valid
1020  * @param width The width of the source frame in pixel
1021  * @param height The height of the source frame in pixel
1022  * @param conversionFlag The conversion to be applied
1023  * @param firstRow First row to be handled
1024  * @param numberRows Number of rows to be handled
1025  * @tparam T Data type of each channel pixel value
1026  * @tparam tChannels Number of data channels, with range [1, infinity)
1027  * @tparam tPixelFunction Pixel modification function
1028  */
1029  template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
1030  static void applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1031 
1032  /**
1033  * Applies a specific modifier function on each pixel.
1034  * @param source The source frame providing the pixel information, must be valid
1035  * @param target The target frame receiving the pixel information, must be valid
1036  * @param width The width of the source frame in pixel, with range [1, infinity)
1037  * @param height The height of the source frame in pixel, with range [1, infinity)
1038  * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
1039  * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
1040  * @param conversionFlag The conversion to be applied
1041  * @param firstRow First row to be handled
1042  * @param numberRows Number of rows to be handled
1043  * @tparam TSource Data type of each source channel pixel value
1044  * @tparam TTarget Data type of each target channel pixel value
1045  * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
1046  * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
1047  * @tparam tPixelFunction Pixel modification function
1048  */
1049  template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
1050  static void applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1051 
1052  /**
1053  * Generic bivariate pixel operations
1054  * @param source0 First source frame
1055  * @param source1 Second source frame
1056  * @param target The target frame
1057  * @param width The width of the source frame in pixel, with range [1, infinity)
1058  * @param height The height of the source frame in pixel, with range [1, infinity)
1059  * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
1060  * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
1061  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1062  * @param conversionFlag The conversion to be applied
1063  * @param firstRow First row to be handled
1064  * @param numberRows Number of rows to be handled
1065  * @tparam TSource0 Type of the first data source
1066  * @tparam TSource1 Type of the second data source
1067  * @tparam TTarget Type of the target
1068  * @tparam TIntermediate Type for the computation of intermediate result, e.g. if TSource0 and TSource1 are different
1069  * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
1070  * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
1071  * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
1072  */
1073  template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074  static void applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1075 
1076  /**
1077  * Applies a row operator to a subset of all rows of a source image.
1078  * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
1079  * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
1080  * @param source The source frame to which the row operator is applied, must be valid
1081  * @param target The target frame receiving the result of the row operator, must be valid
1082  * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
1083  * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
1084  * @param sourceStrideElements The number of stride elements at the end of each source row, in elements, with range [width * tSourceChannels, infinity)
1085  * @param targetStrideElements The number of padding elements at the end of each target row, in elements, with range [width * tTargetChannels, infinity)
1086  * @param rowOperatorFunction The pointer to the row operator function, must be valid
1087  * @param firstRow The first row to be handled, with range [0, height - 1]
1088  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1089  * @tparam TSource The data type of the source elements
1090  * @tparam TTarget The data type of the target elements
1091  * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
1092  * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
1093  */
1094  template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
1095  static void applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows);
1096 
1097  /**
1098  * Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
1099  * @param source The source frame buffer, must be valid
1100  * @param target The target frame buffer, must be valid
1101  * @param width The width of the frame in pixel, with range [1, infinity)
1102  * @param height The height of the frame in pixel, with range [1, infinity)
1103  * @param conversionFlag The conversion to be applied
1104  * @param rowReversePixelOrderFunction The function able to reverse the pixel order, must be valid
1105  * @param bytesPerRow The actual number of bytes each row covers, not including optional padding bytes at the end of each row, with range [width, infinity)
1106  * @param sourceStrideBytes The number of bytes between to start points of successive rows in the source frame, with range [0, infinity)
1107  * @param targetStrideBytes The number of bytes between to start points of successive rows in the target frame, with range [0, infinity)
1108  * @param firstRow The first row to be handled, with range [0, height - 1]
1109  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1110  */
1111  static void transformGenericSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows);
1112 
1113  /**
1114  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1115  * @param frame The image to convert, must be valid
1116  * @param width The width of the image in pixel, with range [1, infinity)
1117  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1118  * @param firstRow The first row to be handled, with range [0, height - 1]
1119  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1120  * @tparam tChannels The number of frame channels, with range [2, infinity)
1121  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1122  */
1123  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1124  static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1125 
1126  /**
1127  * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1128  * @param source The source image to convert, must be valid
1129  * @param target The resulting converted target image, must be valid
1130  * @param width The width of the image in pixel, with range [1, infinity)
1131  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1132  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1133  * @param firstRow The first row to be handled, with range [0, height - 1]
1134  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1135  * @tparam tChannels The number of frame channels, with range [2, infinity)
1136  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1137  */
1138  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1139  static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1140 
1141  /**
1142  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1143  * @param frame The image to convert, must be valid
1144  * @param width The width of the image in pixel, with range [1, infinity)
1145  * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1146  * @param firstRow The first row to be handled, with range [0, height - 1]
1147  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1148  * @tparam tChannels The number of frame channels, with range [2, infinity)
1149  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1150  */
1151  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1152  static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1153 
1154  /**
1155  * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1156  * @param source The source image to convert, must be valid
1157  * @param target The resulting converted target image, must be valid
1158  * @param width The width of the image in pixel, with range [1, infinity)
1159  * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1160  * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1161  * @param firstRow The first row to be handled, with range [0, height - 1]
1162  * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1163  * @tparam tChannels The number of frame channels, with range [2, infinity)
1164  * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1165  */
1166  template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1167  static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1168 
1169 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1170 
1171  /**
1172  * Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the three channels.
1173  * This function can be used to e.g., convert RGB24 to Y8, or RGB24 to Y8.
1174  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1175  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1176  * @param source The pointer to the 16 source pixels (with 3 channels = 64 bytes) to convert, must be valid
1177  * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1178  * @param multiplicationFactors0_128_u_16x8 The multiplication factor for the first channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1179  * @param multiplicationFactors1_128_u_16x8 The multiplication factor for the second channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1180  * @param multiplicationFactors2_128_u_16x8 The multiplication factor for the third channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1181  */
1182  static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8);
1183 
1184  /**
1185  * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1186  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1187  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1188  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1189  * The transformation is based on the following pattern:
1190  * <pre>
1191  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1192  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1193  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1194  * </pre>
1195  * With t target, s source, f factor, and b bias.
1196  * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1197  * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1198  * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1199  * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1200  * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1201  * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1202  * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1203  * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1204  * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1205  * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1206  * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1207  * @param biasChannel0_s_16x8 The bias (translation) value for the first target channel, with range [-127, 127]
1208  * @param biasChannel1_s_16x8 The bias (translation) value for the second target channel, with range [-127, 127]
1209  * @param biasChannel2_s_16x8 The bias (translation) value for the third target channel, with range [-127, 127]
1210  */
1211  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8);
1212 
1213  /**
1214  * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1215  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1216  * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1217  * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1218  * The transformation is based on the following pattern:
1219  * <pre>
1220  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1221  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1222  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1223  * </pre>
1224  * With t target, s source, f factor, and b bias.
1225  * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1226  * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1227  * @param factorChannel00_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1228  * @param factorChannel10_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1229  * @param factorChannel20_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1230  * @param factorChannel01_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1231  * @param factorChannel11_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1232  * @param factorChannel21_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1233  * @param factorChannel02_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1234  * @param factorChannel12_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1235  * @param factorChannel22_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1236  * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-1024 * 16, 1024 * 16]
1237  * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-1024 * 16, 1024 * 16]
1238  * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-1024 * 16, 1024 * 16]
1239  */
1240  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4);
1241 
1242  /**
1243  * Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the four channels.
1244  * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1245  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1246  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1247  * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1248  * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1249  * @param multiplicationFactors0123_128_s_32x The four individual multiplication factors, one for each channel, with ranges [0, 127], while the sum of all four factors must be 128
1250  */
1251  static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x);
1252 
1253  /**
1254  * Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear combination of the four channels.
1255  * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1256  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1257  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1258  * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1259  * @param target The pointer to the 16 target pixels (with 2 channel = 32 bytes) receiving the converted pixel data, must be valid
1260  * @param multiplicationFactorsChannel0_0123_128_s_16x8 The four individual multiplication factors for the first target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1261  * @param multiplicationFactorsChannel1_0123_128_s_16x8 The four individual multiplication factors for the second target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1262  */
1263  static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1264 
1265 #endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1266 
1267 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1268 
1269  /**
1270  * Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the three channels.
1271  * Thus, this function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
1272  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1273  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1274  * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1275  * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1276  * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 128]
1277  * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 128 - factorChannel0 - factorChannel2]
1278  * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 128 - factorChannel0 - factorChannel1]
1279  * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1280  * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1281  * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1282  */
1283  template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
1284  static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8);
1285 
1286  /**
1287  * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1288  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1289  * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1290  * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1291  * The transformation is based on the following pattern:
1292  * <pre>
1293  * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1294  * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1295  * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1296  * </pre>
1297  * With t target, s source, f factor, and b bias/translation.
1298  * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1299  * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1300  * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1301  * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1302  * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1303  * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1304  * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1305  * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1306  * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1307  * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1308  * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1309  * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1310  * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1311  * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1312  */
1313  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1314 
1315  /**
1316  * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1317  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1318  * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1319  * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1320  * The transformation is based on the following pattern:
1321  * <pre>
1322  * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1323  * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1324  * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1325  * </pre>
1326  * With t target, s source, f factor, and b bias/translation.
1327  * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1328  * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1329  * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1330  * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1331  * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1332  * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1333  * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1334  * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1335  * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1336  * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1337  * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1338  * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1339  * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1340  * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1341  */
1342  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1343 
1344  /**
1345  * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1346  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1347  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (also with 128 as denominator).<br>
1348  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1349  * The transformation is based on the following pattern:
1350  * <pre>
1351  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1352  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1353  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1354  * </pre>
1355  * With t target, s source, f factor, and b bias.
1356  * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1357  * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1358  * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1359  * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1360  * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1361  * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1362  * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1363  * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1364  * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1365  * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1366  * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1367  * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1368  * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1369  * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1370  */
1371  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1372 
1373  /**
1374  * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1375  * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1376  * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1377  * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1378  * The transformation is based on the following pattern:
1379  * <pre>
1380  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1381  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1382  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1383  * </pre>
1384  * With t target, s source, f factor, and b bias.
1385  * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1386  * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1387  * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1388  * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1389  * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1390  * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1391  * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1392  * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1393  * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1394  * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1395  * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1396  * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1397  * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1398  * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1399  */
1400  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1401 
1402  /**
1403  * Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1404  * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1405  * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1406  * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1407  * The transformation is based on the following pattern:
1408  * <pre>
1409  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1410  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1411  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1412  * </pre>
1413  * With t target, s source, f factor, and b bias.
1414  * @param source The pointer to the 16 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1415  * @param target The pointer to the 16 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1416  * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1417  * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1418  * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1419  * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1420  * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1421  * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1422  * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1423  * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1424  * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1425  * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1426  * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1427  * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1428  */
1429  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1430 
1431  /**
1432  * Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1433  * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1434  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1435  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1436  * The transformation is based on the following pattern:
1437  * <pre>
1438  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1439  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1440  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1441  * </pre>
1442  * With t target, s source, f factor, and b bias.
1443  * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1444  * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1445  * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1446  * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1447  * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1448  * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1449  * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1450  * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1451  * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1452  * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1453  * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1454  * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1455  * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1456  * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1457  */
1458  static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1459 
1460  /**
1461  * Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1462  * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
1463  * Thus, this function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
1464  * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1465  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1466  * The transformation is based on the following pattern:
1467  * <pre>
1468  * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1469  * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1470  * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1471  * t3 = valueChannel3
1472  * </pre>
1473  * With t target, s source, f factor, and b bias.
1474  * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1475  * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1476  * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1477  * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1478  * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1479  * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1480  * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1481  * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1482  * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1483  * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1484  * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1485  * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1486  * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1487  * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 138]
1488  * @param channelValue3_u_8x16 The constant value for the fourth target channel, with range [0, 255]
1489  */
1490  static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16);
1491 
1492  /**
1493  * Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the four channels.
1494  * Thus, this function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1495  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1496  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1497  * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1498  * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1499  * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 127]
1500  * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 127 - factorChannel0 - factorChannel2 - factorChannel3]
1501  * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel3]
1502  * @param factorChannel3_128_u_8x8 The multiplication factor (8 identical factors) for the fourth channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel2]
1503  * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1504  * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1505  * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1506  * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
1507  */
1508  template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
1509  static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8);
1510 
1511  /**
1512  * Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combination of the four channels.
1513  * Thus, this function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1514  * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1515  * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1516  * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1517  * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1518  * @param factorChannel00_128_u_8x8 The multiplication factor (8 identical factors) for the first target and first source channel, with range [0, 127]
1519  * @param factorChannel10_128_u_8x8 The multiplication factor (8 identical factors) for the second target and first source channel, with range [0, 127]
1520  * @param factorChannel01_128_u_8x8 The multiplication factor (8 identical factors) for the first target and second source channel, with range [0, 127 - factorChannel00 - factorChannel02 - factorChannel03]
1521  * @param factorChannel11_128_u_8x8 The multiplication factor (8 identical factors) for the second target and second source channel, with range [0, 127 - factorChannel10 - factorChannel12 - factorChannel13]
1522  * @param factorChannel02_128_u_8x8 The multiplication factor (8 identical factors) for the first target and third source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel03]
1523  * @param factorChannel12_128_u_8x8 The multiplication factor (8 identical factors) for the second target and third source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel13]
1524  * @param factorChannel03_128_u_8x8 The multiplication factor (8 identical factors) for the first target and fourth source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel02]
1525  * @param factorChannel13_128_u_8x8 The multiplication factor (8 identical factors) for the second target and fourth source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel12]
1526  */
1527  static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8);
1528 
1529 #endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1530 
1531 };
1532 
1533 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1534 
1535 template <>
1536 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1537 {
1538  ocean_assert(sourceFrame != nullptr);
1539  ocean_assert(targetFrames != nullptr);
1540 
1541  ocean_assert(width != 0u && height != 0u);
1542  ocean_assert(channels == 2u);
1543 
1544  constexpr unsigned int tChannels = 2u;
1545 
1546  bool allTargetFramesContinuous = true;
1547 
1548  if (targetFramesPaddingElements != nullptr)
1549  {
1550  for (unsigned int n = 0u; n < tChannels; ++n)
1551  {
1552  if (targetFramesPaddingElements[n] != 0u)
1553  {
1554  allTargetFramesContinuous = false;
1555  break;
1556  }
1557  }
1558  }
1559 
1560  const uint8_t* source = sourceFrame;
1561  uint8_t* target0 = targetFrames[0];
1562  uint8_t* target1 = targetFrames[1];
1563 
1564  constexpr unsigned int tBlockSize = 16u;
1565 
1566  uint8x16x2_t source_8x16x2;
1567 
1568  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1569  {
1570  const unsigned int pixels = width * height;
1571  const unsigned int blocks = pixels / tBlockSize;
1572  const unsigned int remaining = pixels % tBlockSize;
1573 
1574  for (unsigned int n = 0u; n < blocks; ++n)
1575  {
1576  source_8x16x2 = vld2q_u8(source);
1577 
1578  vst1q_u8(target0, source_8x16x2.val[0]);
1579  vst1q_u8(target1, source_8x16x2.val[1]);
1580 
1581  source += tBlockSize * tChannels;
1582 
1583  target0 += tBlockSize;
1584  target1 += tBlockSize;
1585  }
1586 
1587  for (unsigned int n = 0u; n < remaining; ++n)
1588  {
1589  target0[n] = source[n * tChannels + 0u];
1590  target1[n] = source[n * tChannels + 1u];
1591  }
1592  }
1593  else
1594  {
1595  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1596  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1597 
1598  const unsigned int blocks = width / tBlockSize;
1599  const unsigned int remaining = width % tBlockSize;
1600 
1601  for (unsigned int y = 0u; y < height; ++y)
1602  {
1603  for (unsigned int n = 0u; n < blocks; ++n)
1604  {
1605  source_8x16x2 = vld2q_u8(source);
1606 
1607  vst1q_u8(target0, source_8x16x2.val[0]);
1608  vst1q_u8(target1, source_8x16x2.val[1]);
1609 
1610  source += tBlockSize * tChannels;
1611 
1612  target0 += tBlockSize;
1613  target1 += tBlockSize;
1614  }
1615 
1616  for (unsigned int n = 0u; n < remaining; ++n)
1617  {
1618  target0[n] = source[n * tChannels + 0u];
1619  target1[n] = source[n * tChannels + 1u];
1620  }
1621 
1622  source += remaining * tChannels + sourceFramePaddingElements;
1623  target0 += remaining + targetFrame0PaddingElements;
1624  target1 += remaining + targetFrame1PaddingElements;
1625  }
1626  }
1627 }
1628 
1629 template <>
1630 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1631 {
1632  ocean_assert(sourceFrame != nullptr);
1633  ocean_assert(targetFrames != nullptr);
1634 
1635  ocean_assert(width != 0u && height != 0u);
1636  ocean_assert(channels == 3u);
1637 
1638  constexpr unsigned int tChannels = 3u;
1639 
1640  bool allTargetFramesContinuous = true;
1641 
1642  if (targetFramesPaddingElements != nullptr)
1643  {
1644  for (unsigned int n = 0u; n < tChannels; ++n)
1645  {
1646  if (targetFramesPaddingElements[n] != 0u)
1647  {
1648  allTargetFramesContinuous = false;
1649  break;
1650  }
1651  }
1652  }
1653 
1654  const uint8_t* source = sourceFrame;
1655  uint8_t* target0 = targetFrames[0];
1656  uint8_t* target1 = targetFrames[1];
1657  uint8_t* target2 = targetFrames[2];
1658 
1659  constexpr unsigned int tBlockSize = 16u;
1660 
1661  uint8x16x3_t source_8x16x3;
1662 
1663  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1664  {
1665  const unsigned int pixels = width * height;
1666  const unsigned int blocks = pixels / tBlockSize;
1667  const unsigned int remaining = pixels % tBlockSize;
1668 
1669  for (unsigned int n = 0u; n < blocks; ++n)
1670  {
1671  source_8x16x3 = vld3q_u8(source);
1672 
1673  vst1q_u8(target0, source_8x16x3.val[0]);
1674  vst1q_u8(target1, source_8x16x3.val[1]);
1675  vst1q_u8(target2, source_8x16x3.val[2]);
1676 
1677  source += tBlockSize * tChannels;
1678 
1679  target0 += tBlockSize;
1680  target1 += tBlockSize;
1681  target2 += tBlockSize;
1682  }
1683 
1684  for (unsigned int n = 0u; n < remaining; ++n)
1685  {
1686  target0[n] = source[n * tChannels + 0u];
1687  target1[n] = source[n * tChannels + 1u];
1688  target2[n] = source[n * tChannels + 2u];
1689  }
1690  }
1691  else
1692  {
1693  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1694  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1695  const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1696 
1697  const unsigned int blocks = width / tBlockSize;
1698  const unsigned int remaining = width % tBlockSize;
1699 
1700  for (unsigned int y = 0u; y < height; ++y)
1701  {
1702  for (unsigned int n = 0u; n < blocks; ++n)
1703  {
1704  source_8x16x3 = vld3q_u8(source);
1705 
1706  vst1q_u8(target0, source_8x16x3.val[0]);
1707  vst1q_u8(target1, source_8x16x3.val[1]);
1708  vst1q_u8(target2, source_8x16x3.val[2]);
1709 
1710  source += tBlockSize * tChannels;
1711 
1712  target0 += tBlockSize;
1713  target1 += tBlockSize;
1714  target2 += tBlockSize;
1715  }
1716 
1717  for (unsigned int n = 0u; n < remaining; ++n)
1718  {
1719  target0[n] = source[n * tChannels + 0u];
1720  target1[n] = source[n * tChannels + 1u];
1721  target2[n] = source[n * tChannels + 2u];
1722  }
1723 
1724  source += remaining * tChannels + sourceFramePaddingElements;
1725  target0 += remaining + targetFrame0PaddingElements;
1726  target1 += remaining + targetFrame1PaddingElements;
1727  target2 += remaining + targetFrame2PaddingElements;
1728  }
1729  }
1730 }
1731 
1732 template <>
1733 inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1734 {
1735  ocean_assert(sourceFrame != nullptr);
1736  ocean_assert(targetFrames != nullptr);
1737 
1738  ocean_assert(width != 0u && height != 0u);
1739  ocean_assert(channels == 4u);
1740 
1741  constexpr unsigned int tChannels = 4u;
1742 
1743  bool allTargetFramesContinuous = true;
1744 
1745  if (targetFramesPaddingElements != nullptr)
1746  {
1747  for (unsigned int n = 0u; n < tChannels; ++n)
1748  {
1749  if (targetFramesPaddingElements[n] != 0u)
1750  {
1751  allTargetFramesContinuous = false;
1752  break;
1753  }
1754  }
1755  }
1756 
1757  const uint8_t* source = sourceFrame;
1758  uint8_t* target0 = targetFrames[0];
1759  uint8_t* target1 = targetFrames[1];
1760  uint8_t* target2 = targetFrames[2];
1761  uint8_t* target3 = targetFrames[3];
1762 
1763  constexpr unsigned int tBlockSize = 16u;
1764 
1765  uint8x16x4_t source_8x16x4;
1766 
1767  if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1768  {
1769  const unsigned int pixels = width * height;
1770  const unsigned int blocks = pixels / tBlockSize;
1771  const unsigned int remaining = pixels % tBlockSize;
1772 
1773  for (unsigned int n = 0u; n < blocks; ++n)
1774  {
1775  source_8x16x4 = vld4q_u8(source);
1776 
1777  vst1q_u8(target0, source_8x16x4.val[0]);
1778  vst1q_u8(target1, source_8x16x4.val[1]);
1779  vst1q_u8(target2, source_8x16x4.val[2]);
1780  vst1q_u8(target3, source_8x16x4.val[3]);
1781 
1782  source += tBlockSize * tChannels;
1783 
1784  target0 += tBlockSize;
1785  target1 += tBlockSize;
1786  target2 += tBlockSize;
1787  target3 += tBlockSize;
1788  }
1789 
1790  for (unsigned int n = 0u; n < remaining; ++n)
1791  {
1792  target0[n] = source[n * tChannels + 0u];
1793  target1[n] = source[n * tChannels + 1u];
1794  target2[n] = source[n * tChannels + 2u];
1795  target3[n] = source[n * tChannels + 3u];
1796  }
1797  }
1798  else
1799  {
1800  const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1801  const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1802  const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1803  const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
1804 
1805  const unsigned int blocks = width / tBlockSize;
1806  const unsigned int remaining = width % tBlockSize;
1807 
1808  for (unsigned int y = 0u; y < height; ++y)
1809  {
1810  for (unsigned int n = 0u; n < blocks; ++n)
1811  {
1812  source_8x16x4 = vld4q_u8(source);
1813 
1814  vst1q_u8(target0, source_8x16x4.val[0]);
1815  vst1q_u8(target1, source_8x16x4.val[1]);
1816  vst1q_u8(target2, source_8x16x4.val[2]);
1817  vst1q_u8(target3, source_8x16x4.val[3]);
1818 
1819  source += tBlockSize * tChannels;
1820 
1821  target0 += tBlockSize;
1822  target1 += tBlockSize;
1823  target2 += tBlockSize;
1824  target3 += tBlockSize;
1825  }
1826 
1827  for (unsigned int n = 0u; n < remaining; ++n)
1828  {
1829  target0[n] = source[n * tChannels + 0u];
1830  target1[n] = source[n * tChannels + 1u];
1831  target2[n] = source[n * tChannels + 2u];
1832  target3[n] = source[n * tChannels + 3u];
1833  }
1834 
1835  source += remaining * tChannels + sourceFramePaddingElements;
1836  target0 += remaining + targetFrame0PaddingElements;
1837  target1 += remaining + targetFrame1PaddingElements;
1838  target2 += remaining + targetFrame2PaddingElements;
1839  target3 += remaining + targetFrame3PaddingElements;
1840  }
1841  }
1842 }
1843 
1844 #endif // OCEAN_HARDWARE_NEON_VERSION
1845 
1846 template <typename TSource, typename TTarget, unsigned int tChannels>
1847 void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1848 {
1849  ocean_assert(sourceFrame != nullptr);
1850  ocean_assert(targetFrames != nullptr);
1851 
1852  ocean_assert(width != 0u && height != 0u);
1853 
1854  ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
1855 
1856  if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
1857  {
1858  separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1859  return;
1860  }
1861 
1862 #ifdef OCEAN_DEBUG
1863  for (unsigned int c = 0u; c < tChannels; ++c)
1864  {
1865  ocean_assert(targetFrames[c] != nullptr);
1866  }
1867 #endif
1868 
1869  if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
1870  {
1871  for (unsigned int n = 0u; n < width * height; ++n)
1872  {
1873  for (unsigned int c = 0u; c < tChannels; ++c)
1874  {
1875  targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1876  }
1877  }
1878  }
1879  else if (targetFramesPaddingElements == nullptr)
1880  {
1881  ocean_assert(sourceFramePaddingElements != 0u);
1882 
1883  const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1884 
1885  for (unsigned int y = 0u; y < height; ++y)
1886  {
1887  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1888 
1889  const unsigned int targetRowOffset = y * width;
1890 
1891  for (unsigned int x = 0u; x < width; ++x)
1892  {
1893  for (unsigned int c = 0u; c < tChannels; ++c)
1894  {
1895  *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1896  }
1897  }
1898  }
1899  }
1900  else
1901  {
1902  const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1903 
1904  Indices32 targetFrameStrideElements(tChannels);
1905 
1906  for (unsigned int c = 0u; c < tChannels; ++c)
1907  {
1908  targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1909  }
1910 
1911  for (unsigned int y = 0u; y < height; ++y)
1912  {
1913  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1914 
1915  for (unsigned int x = 0u; x < width; ++x)
1916  {
1917  for (unsigned int c = 0u; c < tChannels; ++c)
1918  {
1919  *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1920  }
1921  }
1922  }
1923  }
1924 }
1925 
1926 template <typename TSource, typename TTarget>
1927 void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1928 {
1929  ocean_assert(targetFrames.size() >= 1);
1930  ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1931 
1932  if (targetFrames.size() == 2)
1933  {
1934  separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1935  }
1936  else if (targetFrames.size() == 3)
1937  {
1938  separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1939  }
1940  else if (targetFrames.size() == 4)
1941  {
1942  separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1943  }
1944  else
1945  {
1946  separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1947  }
1948 }
1949 
1950 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1951 
1952 template <>
1953 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1954 {
1955  ocean_assert(sourceFrames != nullptr);
1956  ocean_assert(targetFrame != nullptr);
1957 
1958  ocean_assert(width != 0u && height != 0u);
1959  ocean_assert(channels == 2u);
1960 
1961  constexpr unsigned int tChannels = 2u;
1962 
1963  bool allSourceFramesContinuous = true;
1964 
1965  if (sourceFramesPaddingElements != nullptr)
1966  {
1967  for (unsigned int n = 0u; n < tChannels; ++n)
1968  {
1969  if (sourceFramesPaddingElements[n] != 0u)
1970  {
1971  allSourceFramesContinuous = false;
1972  break;
1973  }
1974  }
1975  }
1976 
1977  const uint8_t* source0 = sourceFrames[0];
1978  const uint8_t* source1 = sourceFrames[1];
1979  uint8_t* target = targetFrame;
1980 
1981  constexpr unsigned int tBlockSize = 16u;
1982 
1983  uint8x16x2_t source_8x16x2;
1984 
1985  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1986  {
1987  const unsigned int pixels = width * height;
1988  const unsigned int blocks = pixels / tBlockSize;
1989  const unsigned int remaining = pixels % tBlockSize;
1990 
1991  for (unsigned int n = 0u; n < blocks; ++n)
1992  {
1993  source_8x16x2.val[0] = vld1q_u8(source0);
1994  source_8x16x2.val[1] = vld1q_u8(source1);
1995 
1996  vst2q_u8(target, source_8x16x2);
1997 
1998  source0 += tBlockSize;
1999  source1 += tBlockSize;
2000 
2001  target += tBlockSize * tChannels;
2002  }
2003 
2004  for (unsigned int n = 0u; n < remaining; ++n)
2005  {
2006  target[n * tChannels + 0u] = source0[n];
2007  target[n * tChannels + 1u] = source1[n];
2008  }
2009  }
2010  else
2011  {
2012  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2013  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2014 
2015  const unsigned int blocks = width / tBlockSize;
2016  const unsigned int remaining = width % tBlockSize;
2017 
2018  for (unsigned int y = 0u; y < height; ++y)
2019  {
2020  for (unsigned int n = 0u; n < blocks; ++n)
2021  {
2022  source_8x16x2.val[0] = vld1q_u8(source0);
2023  source_8x16x2.val[1] = vld1q_u8(source1);
2024 
2025  vst2q_u8(target, source_8x16x2);
2026 
2027  source0 += tBlockSize;
2028  source1 += tBlockSize;
2029 
2030  target += tBlockSize * tChannels;
2031  }
2032 
2033  for (unsigned int n = 0u; n < remaining; ++n)
2034  {
2035  target[n * tChannels + 0u] = source0[n];
2036  target[n * tChannels + 1u] = source1[n];
2037  }
2038 
2039  source0 += remaining + sourceFrame0PaddingElements;
2040  source1 += remaining + sourceFrame1PaddingElements;
2041  target += remaining * tChannels + targetFramePaddingElements;
2042  }
2043  }
2044 }
2045 
2046 template <>
2047 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2048 {
2049  ocean_assert(sourceFrames != nullptr);
2050  ocean_assert(targetFrame != nullptr);
2051 
2052  ocean_assert(width != 0u && height != 0u);
2053  ocean_assert(channels == 3u);
2054 
2055  constexpr unsigned int tChannels = 3u;
2056 
2057  bool allSourceFramesContinuous = true;
2058 
2059  if (sourceFramesPaddingElements != nullptr)
2060  {
2061  for (unsigned int n = 0u; n < tChannels; ++n)
2062  {
2063  if (sourceFramesPaddingElements[n] != 0u)
2064  {
2065  allSourceFramesContinuous = false;
2066  break;
2067  }
2068  }
2069  }
2070 
2071  const uint8_t* source0 = sourceFrames[0];
2072  const uint8_t* source1 = sourceFrames[1];
2073  const uint8_t* source2 = sourceFrames[2];
2074  uint8_t* target = targetFrame;
2075 
2076  constexpr unsigned int tBlockSize = 16u;
2077 
2078  uint8x16x3_t source_8x16x3;
2079 
2080  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2081  {
2082  const unsigned int pixels = width * height;
2083  const unsigned int blocks = pixels / tBlockSize;
2084  const unsigned int remaining = pixels % tBlockSize;
2085 
2086  for (unsigned int n = 0u; n < blocks; ++n)
2087  {
2088  source_8x16x3.val[0] = vld1q_u8(source0);
2089  source_8x16x3.val[1] = vld1q_u8(source1);
2090  source_8x16x3.val[2] = vld1q_u8(source2);
2091 
2092  vst3q_u8(target, source_8x16x3);
2093 
2094  source0 += tBlockSize;
2095  source1 += tBlockSize;
2096  source2 += tBlockSize;
2097 
2098  target += tBlockSize * tChannels;
2099  }
2100 
2101  for (unsigned int n = 0u; n < remaining; ++n)
2102  {
2103  target[n * tChannels + 0u] = source0[n];
2104  target[n * tChannels + 1u] = source1[n];
2105  target[n * tChannels + 2u] = source2[n];
2106  }
2107  }
2108  else
2109  {
2110  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2111  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2112  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2113 
2114  const unsigned int blocks = width / tBlockSize;
2115  const unsigned int remaining = width % tBlockSize;
2116 
2117  for (unsigned int y = 0u; y < height; ++y)
2118  {
2119  for (unsigned int n = 0u; n < blocks; ++n)
2120  {
2121  source_8x16x3.val[0] = vld1q_u8(source0);
2122  source_8x16x3.val[1] = vld1q_u8(source1);
2123  source_8x16x3.val[2] = vld1q_u8(source2);
2124 
2125  vst3q_u8(target, source_8x16x3);
2126 
2127  source0 += tBlockSize;
2128  source1 += tBlockSize;
2129  source2 += tBlockSize;
2130 
2131  target += tBlockSize * tChannels;
2132  }
2133 
2134  for (unsigned int n = 0u; n < remaining; ++n)
2135  {
2136  target[n * tChannels + 0u] = source0[n];
2137  target[n * tChannels + 1u] = source1[n];
2138  target[n * tChannels + 2u] = source2[n];
2139  }
2140 
2141  source0 += remaining + sourceFrame0PaddingElements;
2142  source1 += remaining + sourceFrame1PaddingElements;
2143  source2 += remaining + sourceFrame2PaddingElements;
2144  target += remaining * tChannels + targetFramePaddingElements;
2145  }
2146  }
2147 }
2148 
2149 template <>
2150 inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2151 {
2152  ocean_assert(sourceFrames != nullptr);
2153  ocean_assert(targetFrame != nullptr);
2154 
2155  ocean_assert(width != 0u && height != 0u);
2156  ocean_assert(channels == 4u);
2157 
2158  constexpr unsigned int tChannels = 4u;
2159 
2160  bool allSourceFramesContinuous = true;
2161 
2162  if (sourceFramesPaddingElements != nullptr)
2163  {
2164  for (unsigned int n = 0u; n < tChannels; ++n)
2165  {
2166  if (sourceFramesPaddingElements[n] != 0u)
2167  {
2168  allSourceFramesContinuous = false;
2169  break;
2170  }
2171  }
2172  }
2173 
2174  const uint8_t* source0 = sourceFrames[0];
2175  const uint8_t* source1 = sourceFrames[1];
2176  const uint8_t* source2 = sourceFrames[2];
2177  const uint8_t* source3 = sourceFrames[3];
2178  uint8_t* target = targetFrame;
2179 
2180  constexpr unsigned int tBlockSize = 16u;
2181 
2182  uint8x16x4_t source_8x16x4;
2183 
2184  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2185  {
2186  const unsigned int pixels = width * height;
2187  const unsigned int blocks = pixels / tBlockSize;
2188  const unsigned int remaining = pixels % tBlockSize;
2189 
2190  for (unsigned int n = 0u; n < blocks; ++n)
2191  {
2192  source_8x16x4.val[0] = vld1q_u8(source0);
2193  source_8x16x4.val[1] = vld1q_u8(source1);
2194  source_8x16x4.val[2] = vld1q_u8(source2);
2195  source_8x16x4.val[3] = vld1q_u8(source3);
2196 
2197  vst4q_u8(target, source_8x16x4);
2198 
2199  source0 += tBlockSize;
2200  source1 += tBlockSize;
2201  source2 += tBlockSize;
2202  source3 += tBlockSize;
2203 
2204  target += tBlockSize * tChannels;
2205  }
2206 
2207  for (unsigned int n = 0u; n < remaining; ++n)
2208  {
2209  target[n * tChannels + 0u] = source0[n];
2210  target[n * tChannels + 1u] = source1[n];
2211  target[n * tChannels + 2u] = source2[n];
2212  target[n * tChannels + 3u] = source3[n];
2213  }
2214  }
2215  else
2216  {
2217  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2218  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2219  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2220  const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2221 
2222  const unsigned int blocks = width / tBlockSize;
2223  const unsigned int remaining = width % tBlockSize;
2224 
2225  for (unsigned int y = 0u; y < height; ++y)
2226  {
2227  for (unsigned int n = 0u; n < blocks; ++n)
2228  {
2229  source_8x16x4.val[0] = vld1q_u8(source0);
2230  source_8x16x4.val[1] = vld1q_u8(source1);
2231  source_8x16x4.val[2] = vld1q_u8(source2);
2232  source_8x16x4.val[3] = vld1q_u8(source3);
2233 
2234  vst4q_u8(target, source_8x16x4);
2235 
2236  source0 += tBlockSize;
2237  source1 += tBlockSize;
2238  source2 += tBlockSize;
2239  source3 += tBlockSize;
2240 
2241  target += tBlockSize * tChannels;
2242  }
2243 
2244  for (unsigned int n = 0u; n < remaining; ++n)
2245  {
2246  target[n * tChannels + 0u] = source0[n];
2247  target[n * tChannels + 1u] = source1[n];
2248  target[n * tChannels + 2u] = source2[n];
2249  target[n * tChannels + 3u] = source3[n];
2250  }
2251 
2252  source0 += remaining + sourceFrame0PaddingElements;
2253  source1 += remaining + sourceFrame1PaddingElements;
2254  source2 += remaining + sourceFrame2PaddingElements;
2255  source3 += remaining + sourceFrame3PaddingElements;
2256  target += remaining * tChannels + targetFramePaddingElements;
2257  }
2258  }
2259 }
2260 
2261 template <>
2262 inline void FrameChannels::zipChannels<float, uint8_t, 2u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2263 {
2264  ocean_assert(sourceFrames != nullptr);
2265  ocean_assert(targetFrame != nullptr);
2266 
2267  ocean_assert(width != 0u && height != 0u);
2268  ocean_assert(channels == 2u);
2269 
2270  constexpr unsigned int tChannels = 2u;
2271 
2272  bool allSourceFramesContinuous = true;
2273 
2274  if (sourceFramesPaddingElements != nullptr)
2275  {
2276  for (unsigned int n = 0u; n < tChannels; ++n)
2277  {
2278  if (sourceFramesPaddingElements[n] != 0u)
2279  {
2280  allSourceFramesContinuous = false;
2281  break;
2282  }
2283  }
2284  }
2285 
2286  const float* source0 = sourceFrames[0];
2287  const float* source1 = sourceFrames[1];
2288  uint8_t* target = targetFrame;
2289 
2290  constexpr unsigned int tBlockSize = 16u;
2291 
2292  uint8x16x2_t target_8x16x2;
2293 
2294  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2295  {
2296  const unsigned int pixels = width * height;
2297  const unsigned int blocks = pixels / tBlockSize;
2298  const unsigned int remaining = pixels % tBlockSize;
2299 
2300  for (unsigned int n = 0u; n < blocks; ++n)
2301  {
2302  target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2303  target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2304 
2305  vst2q_u8(target, target_8x16x2);
2306 
2307  source0 += tBlockSize;
2308  source1 += tBlockSize;
2309 
2310  target += tBlockSize * tChannels;
2311  }
2312 
2313  for (unsigned int n = 0u; n < remaining; ++n)
2314  {
2315  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2317 
2318  target[n * tChannels + 0u] = uint8_t(source0[n]);
2319  target[n * tChannels + 1u] = uint8_t(source1[n]);
2320  }
2321  }
2322  else
2323  {
2324  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2325  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2326 
2327  const unsigned int blocks = width / tBlockSize;
2328  const unsigned int remaining = width % tBlockSize;
2329 
2330  for (unsigned int y = 0u; y < height; ++y)
2331  {
2332  for (unsigned int n = 0u; n < blocks; ++n)
2333  {
2334  target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2335  target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2336 
2337  vst2q_u8(target, target_8x16x2);
2338 
2339  source0 += tBlockSize;
2340  source1 += tBlockSize;
2341 
2342  target += tBlockSize * tChannels;
2343  }
2344 
2345  for (unsigned int n = 0u; n < remaining; ++n)
2346  {
2347  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2349 
2350  target[n * tChannels + 0u] = uint8_t(source0[n]);
2351  target[n * tChannels + 1u] = uint8_t(source1[n]);
2352  }
2353 
2354  source0 += remaining + sourceFrame0PaddingElements;
2355  source1 += remaining + sourceFrame1PaddingElements;
2356  target += remaining * tChannels + targetFramePaddingElements;
2357  }
2358  }
2359 }
2360 
2361 template <>
2362 inline void FrameChannels::zipChannels<float, uint8_t, 3u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2363 {
2364  ocean_assert(sourceFrames != nullptr);
2365  ocean_assert(targetFrame != nullptr);
2366 
2367  ocean_assert(width != 0u && height != 0u);
2368  ocean_assert(channels == 3u);
2369 
2370  constexpr unsigned int tChannels = 3u;
2371 
2372  bool allSourceFramesContinuous = true;
2373 
2374  if (sourceFramesPaddingElements != nullptr)
2375  {
2376  for (unsigned int n = 0u; n < tChannels; ++n)
2377  {
2378  if (sourceFramesPaddingElements[n] != 0u)
2379  {
2380  allSourceFramesContinuous = false;
2381  break;
2382  }
2383  }
2384  }
2385 
2386  const float* source0 = sourceFrames[0];
2387  const float* source1 = sourceFrames[1];
2388  const float* source2 = sourceFrames[2];
2389  uint8_t* target = targetFrame;
2390 
2391  constexpr unsigned int tBlockSize = 16u;
2392 
2393  uint8x16x3_t target_8x16x3;
2394 
2395  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2396  {
2397  const unsigned int pixels = width * height;
2398  const unsigned int blocks = pixels / tBlockSize;
2399  const unsigned int remaining = pixels % tBlockSize;
2400 
2401  for (unsigned int n = 0u; n < blocks; ++n)
2402  {
2403  target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2404  target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2405  target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2406 
2407  vst3q_u8(target, target_8x16x3);
2408 
2409  source0 += tBlockSize;
2410  source1 += tBlockSize;
2411  source2 += tBlockSize;
2412 
2413  target += tBlockSize * tChannels;
2414  }
2415 
2416  for (unsigned int n = 0u; n < remaining; ++n)
2417  {
2418  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2421 
2422  target[n * tChannels + 0u] = uint8_t(source0[n]);
2423  target[n * tChannels + 1u] = uint8_t(source1[n]);
2424  target[n * tChannels + 2u] = uint8_t(source2[n]);
2425  }
2426  }
2427  else
2428  {
2429  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2430  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2431  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2432 
2433  const unsigned int blocks = width / tBlockSize;
2434  const unsigned int remaining = width % tBlockSize;
2435 
2436  for (unsigned int y = 0u; y < height; ++y)
2437  {
2438  for (unsigned int n = 0u; n < blocks; ++n)
2439  {
2440  target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2441  target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2442  target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2443 
2444 
2445  vst3q_u8(target, target_8x16x3);
2446 
2447  source0 += tBlockSize;
2448  source1 += tBlockSize;
2449  source2 += tBlockSize;
2450 
2451  target += tBlockSize * tChannels;
2452  }
2453 
2454  for (unsigned int n = 0u; n < remaining; ++n)
2455  {
2456  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2459 
2460  target[n * tChannels + 0u] = uint8_t(source0[n]);
2461  target[n * tChannels + 1u] = uint8_t(source1[n]);
2462  target[n * tChannels + 2u] = uint8_t(source2[n]);
2463  }
2464 
2465  source0 += remaining + sourceFrame0PaddingElements;
2466  source1 += remaining + sourceFrame1PaddingElements;
2467  source2 += remaining + sourceFrame2PaddingElements;
2468  target += remaining * tChannels + targetFramePaddingElements;
2469  }
2470  }
2471 }
2472 
2473 template <>
2474 inline void FrameChannels::zipChannels<float, uint8_t, 4u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2475 {
2476  ocean_assert(sourceFrames != nullptr);
2477  ocean_assert(targetFrame != nullptr);
2478 
2479  ocean_assert(width != 0u && height != 0u);
2480  ocean_assert(channels == 4u);
2481 
2482  constexpr unsigned int tChannels = 4u;
2483 
2484  bool allSourceFramesContinuous = true;
2485 
2486  if (sourceFramesPaddingElements != nullptr)
2487  {
2488  for (unsigned int n = 0u; n < tChannels; ++n)
2489  {
2490  if (sourceFramesPaddingElements[n] != 0u)
2491  {
2492  allSourceFramesContinuous = false;
2493  break;
2494  }
2495  }
2496  }
2497 
2498  const float* source0 = sourceFrames[0];
2499  const float* source1 = sourceFrames[1];
2500  const float* source2 = sourceFrames[2];
2501  const float* source3 = sourceFrames[3];
2502  uint8_t* target = targetFrame;
2503 
2504  constexpr unsigned int tBlockSize = 16u;
2505 
2506  uint8x16x4_t target_8x16x4;
2507 
2508  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2509  {
2510  const unsigned int pixels = width * height;
2511  const unsigned int blocks = pixels / tBlockSize;
2512  const unsigned int remaining = pixels % tBlockSize;
2513 
2514  for (unsigned int n = 0u; n < blocks; ++n)
2515  {
2516  target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2517  target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2518  target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2519  target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2520 
2521  vst4q_u8(target, target_8x16x4);
2522 
2523  source0 += tBlockSize;
2524  source1 += tBlockSize;
2525  source2 += tBlockSize;
2526  source3 += tBlockSize;
2527 
2528  target += tBlockSize * tChannels;
2529  }
2530 
2531  for (unsigned int n = 0u; n < remaining; ++n)
2532  {
2533  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536  ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2537 
2538  target[n * tChannels + 0u] = uint8_t(source0[n]);
2539  target[n * tChannels + 1u] = uint8_t(source1[n]);
2540  target[n * tChannels + 2u] = uint8_t(source2[n]);
2541  target[n * tChannels + 3u] = uint8_t(source3[n]);
2542  }
2543  }
2544  else
2545  {
2546  const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2547  const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2548  const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2549  const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2550 
2551  const unsigned int blocks = width / tBlockSize;
2552  const unsigned int remaining = width % tBlockSize;
2553 
2554  for (unsigned int y = 0u; y < height; ++y)
2555  {
2556  for (unsigned int n = 0u; n < blocks; ++n)
2557  {
2558  target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2559  target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2560  target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2561  target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2562 
2563  vst4q_u8(target, target_8x16x4);
2564 
2565  source0 += tBlockSize;
2566  source1 += tBlockSize;
2567  source2 += tBlockSize;
2568  source3 += tBlockSize;
2569 
2570  target += tBlockSize * tChannels;
2571  }
2572 
2573  for (unsigned int n = 0u; n < remaining; ++n)
2574  {
2575  ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576  ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577  ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578  ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2579 
2580  target[n * tChannels + 0u] = uint8_t(source0[n]);
2581  target[n * tChannels + 1u] = uint8_t(source1[n]);
2582  target[n * tChannels + 2u] = uint8_t(source2[n]);
2583  target[n * tChannels + 3u] = uint8_t(source3[n]);
2584  }
2585 
2586  source0 += remaining + sourceFrame0PaddingElements;
2587  source1 += remaining + sourceFrame1PaddingElements;
2588  source2 += remaining + sourceFrame2PaddingElements;
2589  source3 += remaining + sourceFrame3PaddingElements;
2590  target += remaining * tChannels + targetFramePaddingElements;
2591  }
2592  }
2593 }
2594 
2595 #endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2596 
2597 template <typename TSource, typename TTarget, unsigned int tChannels>
2598 void FrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2599 {
2600  ocean_assert(sourceFrames != nullptr);
2601  ocean_assert(targetFrame != nullptr);
2602 
2603  ocean_assert(width != 0u && height != 0u);
2604 
2605  ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
2606 
2607  if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
2608  {
2609  zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2610  return;
2611  }
2612 
2613  bool allSourceFramesContinuous = true;
2614 
2615  if (sourceFramesPaddingElements != nullptr)
2616  {
2617  for (unsigned int n = 0u; n < tChannels; ++n)
2618  {
2619  if (sourceFramesPaddingElements[n] != 0u)
2620  {
2621  allSourceFramesContinuous = false;
2622  break;
2623  }
2624  }
2625  }
2626 
2627  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2628  {
2629  for (unsigned int n = 0u; n < width * height; ++n)
2630  {
2631  for (unsigned int c = 0u; c < tChannels; ++c)
2632  {
2633  targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2634  }
2635  }
2636  }
2637  else
2638  {
2639  const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2640 
2641  Indices32 sourceFrameStrideElements(tChannels);
2642 
2643  for (unsigned int c = 0u; c < tChannels; ++c)
2644  {
2645  if (sourceFramesPaddingElements == nullptr)
2646  {
2647  sourceFrameStrideElements[c] = width;
2648  }
2649  else
2650  {
2651  sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2652  }
2653  }
2654 
2655  for (unsigned int y = 0u; y < height; ++y)
2656  {
2657  TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
2658 
2659  for (unsigned int x = 0u; x < width; ++x)
2660  {
2661  for (unsigned int c = 0u; c < tChannels; ++c)
2662  {
2663  *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2664  }
2665  }
2666  }
2667  }
2668 }
2669 
2670 template <typename TSource, typename TTarget>
2671 void FrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
2672 {
2673  ocean_assert(sourceFrames.size() >= 1);
2674  ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2675 
2676  if (sourceFrames.size() == 2)
2677  {
2678  zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2679  }
2680  else if (sourceFrames.size() == 3)
2681  {
2682  zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2683  }
2684  else if (sourceFrames.size() == 4)
2685  {
2686  zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2687  }
2688  else
2689  {
2690  zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2691  }
2692 }
2693 
2694 template <typename T, unsigned int tSourceChannels>
2695 inline void FrameChannels::addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2696 {
2697  static_assert(tSourceChannels != 0u, "Invalid channel number!");
2698 
2699  ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2700  ocean_assert(source != target);
2701  ocean_assert(width >= 1u && height >= 1u);
2702 
2703  const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2704 
2705  const void* sources[2] = {source, sourceNewChannel};
2706 
2707  FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2708 }
2709 
2710 template <typename T, unsigned int tSourceChannels>
2711 inline void FrameChannels::addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2712 {
2713  static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2714 
2715  ocean_assert(source != nullptr && target != nullptr);
2716  ocean_assert(width >= 1u && height >= 1u);
2717 
2718  const unsigned int targetChannels = tSourceChannels + 1u;
2719 
2720  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721  const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2722 
2723  const void* channelValueParameter = (const void*)(&newChannelValue);
2724 
2725  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2726 
2727  FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2728 }
2729 
2730 template <typename T, unsigned int tSourceChannels>
2731 inline void FrameChannels::addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2732 {
2733  static_assert(tSourceChannels != 0u, "Invalid channel number!");
2734 
2735  ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2736  ocean_assert(source != target);
2737  ocean_assert(width >= 1u && height >= 1u);
2738 
2739  const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2740 
2741  const void* sources[2] = {source, sourceNewChannel};
2742 
2743  FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2744 }
2745 
2746 template <typename T, unsigned int tSourceChannels>
2747 inline void FrameChannels::addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2748 {
2749  static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2750 
2751  ocean_assert(source != nullptr && target != nullptr);
2752  ocean_assert(width >= 1u && height >= 1u);
2753 
2754  const unsigned int targetChannels = tSourceChannels + 1u;
2755 
2756  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757  const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2758 
2759  const void* channelValueParameter = (const void*)(&newChannelValue);
2760 
2761  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2762 
2763  FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2764 }
2765 
2766 template <typename T, unsigned int tSourceChannels>
2767 inline void FrameChannels::removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2768 {
2769  static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2770 
2771  ocean_assert(source != nullptr && target != nullptr);
2772  ocean_assert(width >= 1u && height >= 1u);
2773 
2774  const unsigned int shufflePatternMax = 0x07654321u;
2775  const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2776 
2777  const unsigned int shufflePattern = shufflePatternMax & mask;
2778 
2779  FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2780 }
2781 
2782 template <typename T, unsigned int tSourceChannels>
2783 inline void FrameChannels::removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2784 {
2785  static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2786 
2787  ocean_assert(source != nullptr && target != nullptr);
2788  ocean_assert(width >= 1u && height >= 1u);
2789 
2790  const unsigned int shufflePatternMax = 0x76543210u;
2791  const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2792 
2793  const unsigned int shufflePattern = shufflePatternMax & mask;
2794 
2795  FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2796 }
2797 
2798 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
2799 inline void FrameChannels::copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2800 {
2801  static_assert(tSourceChannels >= 1u, "Invalid number of channels!");
2802  static_assert(tTargetChannels >= 1u, "Invalid number of channels!");
2803 
2804  static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel index!");
2805  static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel index!");
2806 
2807  ocean_assert(source != nullptr && target != nullptr);
2808  ocean_assert(width >= 1u && height >= 1u);
2809 
2810  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811  const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2812 
2813  constexpr RowReversePixelOrderInPlaceFunction<T> reversePixelOrderRowInPlaceFunction = nullptr;
2814 
2815  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2816 
2817  FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous, nullptr, worker);
2818 }
2819 
2820 template <typename T, unsigned int tChannel, unsigned int tChannels>
2821 inline void FrameChannels::setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker)
2822 {
2823  static_assert(tChannels >= 1u, "Invalid channel number!");
2824  static_assert(tChannel < tChannels, "Invalid channel index!");
2825 
2826  ocean_assert(frame != nullptr);
2827  ocean_assert(width >= 1u && height >= 1u);
2828 
2829  if (worker)
2830  {
2831  worker->executeFunction(Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2832  }
2833  else
2834  {
2835  setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2836  }
2837 }
2838 
2839 template <typename T, unsigned int tChannels>
2840 inline void FrameChannels::reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2841 {
2842  static_assert(tChannels >= 1u, "Invalid channel number!");
2843 
2844  ocean_assert(source != nullptr && target != nullptr);
2845  ocean_assert(width >= 1u && height >= 1u);
2846 
2847  const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2849 
2850  constexpr bool areContinuous = false; // even if both images are continuous, we must reverse each line by another
2851 
2852  FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous, nullptr, worker);
2853 }
2854 
2855 template <typename T, unsigned int tChannels>
2856 void FrameChannels::reverseRowPixelOrder(const T* source, T* target, const size_t size)
2857 {
2858  static_assert(tChannels >= 1u, "Invalid channel number!");
2859 
2860  ocean_assert(source != nullptr && target != nullptr);
2861  ocean_assert(size >= 1);
2862 
2863 #ifdef OCEAN_DEBUG
2864  const T* const debugSourceStart = source;
2865  const T* const debugSourceEnd = debugSourceStart + size * tChannels;
2866 
2867  const T* const debugTargetStart = target;
2868  const T* const debugTargetEnd = debugTargetStart + size * tChannels;
2869 #endif
2870 
2871  // moving target to the end of the memory block
2872  target += size * tChannels;
2873 
2874  const T* const sourceEnd = source + size * tChannels;
2875 
2876 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2877 
2878  if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2879  {
2880  const size_t blocks16 = size / size_t(16);
2881 
2882  switch (tChannels)
2883  {
2884  case 1u:
2885  {
2886  for (size_t n = 0; n < blocks16; ++n)
2887  {
2888  target -= 16u * tChannels;
2889 
2890  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2892 
2893  const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)(source));
2894  uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895  revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2896 
2897  vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2898 
2899  source += 16u * tChannels;
2900  }
2901 
2902  break;
2903  }
2904 
2905  case 2u:
2906  {
2907  for (size_t n = 0; n < blocks16; ++n)
2908  {
2909  target -= 16u * tChannels;
2910 
2911  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2913 
2914  const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2915  const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2916 
2917  const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918  const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2919 
2920  const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921  const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2922 
2923  vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924  vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2925 
2926  source += 16u * tChannels;
2927  }
2928 
2929  break;
2930  }
2931 
2932  case 3u:
2933  {
2934  for (size_t n = 0; n < blocks16; ++n)
2935  {
2936  target -= 16u * tChannels;
2937 
2938  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2940 
2941  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)(source));
2942 
2943  uint8x16x3_t revSource_u_8x16x3;
2944  revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945  revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946  revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2947 
2948  vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2949 
2950  source += 16u * tChannels;
2951  }
2952 
2953  break;
2954  }
2955 
2956  case 4u:
2957  {
2958  for (size_t n = 0; n < blocks16; ++n)
2959  {
2960  target -= 16u * tChannels;
2961 
2962  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2964 
2965  const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2966  const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2967  const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)(source) + 32);
2968  const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)(source) + 48);
2969 
2970  const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971  const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972  const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973  const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2974 
2975  const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976  const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977  const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978  const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2979 
2980  vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981  vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982  vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983  vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2984 
2985  source += 16u * tChannels;
2986  }
2987 
2988  break;
2989  }
2990 
2991  default:
2992  break;
2993  }
2994  }
2995 
2996 #endif // OCEAN_HARDWARE_NEON_VERSION
2997 
2998  while (source != sourceEnd)
2999  {
3000  ocean_assert(source < sourceEnd);
3001 
3002  for (unsigned int n = 0u; n < tChannels; ++n)
3003  {
3004  ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005  ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3006 
3007  ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3008 
3009  *--target = source[tChannels - n - 1u];
3010  }
3011 
3012  source += tChannels;
3013  }
3014 }
3015 
3016 template <typename T, unsigned int tChannels>
3017 void FrameChannels::reverseRowPixelOrderInPlace(T* data, const size_t size)
3018 {
3019  static_assert(tChannels >= 1u, "Invalid channel number!");
3020 
3021  ocean_assert(data != nullptr);
3022  ocean_assert(size >= 1);
3023 
3024  typedef typename DataType<T, tChannels>::Type PixelType;
3025 
3026  size_t n = 0;
3027 
3028 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3029 
3030  if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3031  {
3032  if (size >= 32)
3033  {
3034  const size_t blocks32 = size / size_t(32);
3035 
3036  uint8_t* left = (uint8_t*)(data);
3037  uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3038 
3039  switch (tChannels)
3040  {
3041  case 1u:
3042  {
3043  for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3044  {
3045  const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046  const uint8x16_t right_u_8x16 = vld1q_u8(right);
3047 
3048  uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049  revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3050 
3051  uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052  revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3053 
3054  vst1q_u8(left, revRight_u_8x16);
3055  vst1q_u8(right, revLeft_u_8x16);
3056 
3057  left += 16u * tChannels;
3058  right -= 16u * tChannels;
3059  }
3060 
3061  n += blocks32 * 16u;
3062 
3063  break;
3064  }
3065 
3066  case 2u:
3067  {
3068  for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3069  {
3070  const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071  const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3072 
3073  uint8x16x2_t revLeft_u_8x16x2;
3074  revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075  revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076  revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077  revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3078 
3079  uint8x16x2_t revRight_u_8x16x2;
3080  revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081  revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082  revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083  revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3084 
3085  vst2q_u8(left, revRight_u_8x16x2);
3086  vst2q_u8(right, revLeft_u_8x16x2);
3087 
3088  left += 16u * tChannels;
3089  right -= 16u * tChannels;
3090  }
3091 
3092  n += blocks32 * 16u;
3093 
3094  break;
3095  }
3096 
3097  case 3u:
3098  {
3099  for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3100  {
3101  const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102  const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3103 
3104  uint8x16x3_t revLeft_u_8x16x3;
3105  revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106  revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107  revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108  revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109  revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110  revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3111 
3112  uint8x16x3_t revRight_u_8x16x3;
3113  revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114  revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115  revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116  revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117  revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118  revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3119 
3120  vst3q_u8(left, revRight_u_8x16x3);
3121  vst3q_u8(right, revLeft_u_8x16x3);
3122 
3123  left += 16u * tChannels;
3124  right -= 16u * tChannels;
3125  }
3126 
3127  n += blocks32 * 16u;
3128 
3129  break;
3130  }
3131 
3132  case 4u:
3133  {
3134  for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3135  {
3136  const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137  const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3138 
3139  uint8x16x4_t revLeft_u_8x16x4;
3140  revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141  revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142  revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143  revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144  revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145  revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146  revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147  revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3148 
3149  uint8x16x4_t revRight_u_8x16x4;
3150  revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151  revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152  revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153  revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154  revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155  revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156  revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157  revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3158 
3159  vst4q_u8(left, revRight_u_8x16x4);
3160  vst4q_u8(right, revLeft_u_8x16x4);
3161 
3162  left += 16u * tChannels;
3163  right -= 16u * tChannels;
3164  }
3165 
3166  n += blocks32 * 16u;
3167 
3168  break;
3169  }
3170 
3171  default:
3172  break;
3173  }
3174  }
3175  }
3176 
3177 #endif
3178 
3179  PixelType intermediate;
3180 
3181  PixelType* const pixels = (PixelType*)(data);
3182 
3183  while (n < size / 2)
3184  {
3185  intermediate = pixels[n];
3186 
3187  pixels[n] = pixels[size - n - 1];
3188  pixels[size - n - 1] = intermediate;
3189 
3190  ++n;
3191  }
3192 }
3193 
3194 template <typename T, unsigned int tChannels>
3195 void FrameChannels::reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* /*options*/)
3196 {
3197  ocean_assert(source != nullptr && target != nullptr);
3198  ocean_assert(source != target);
3199  ocean_assert(size >= 1);
3200 
3201 #ifdef OCEAN_DEBUG
3202  const T* const debugSourceStart = source;
3203  const T* const debugSourceEnd = debugSourceStart + size * tChannels;
3204 
3205  const T* const debugTargetStart = target;
3206  const T* const debugTargetEnd = debugTargetStart + size * tChannels;
3207 #endif
3208 
3209  if constexpr (tChannels == 1)
3210  {
3211  // we actually copy the one channel
3212 
3213  memcpy(target, source, sizeof(T) * size);
3214  return;
3215  }
3216 
3217  const T* const sourceEnd = source + size * tChannels;
3218 
3219 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3220 
3221  if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3222  {
3223  const size_t blocks16 = size / size_t(16);
3224 
3225  switch (tChannels)
3226  {
3227  case 1u:
3228  ocean_assert(false && "This should have been handled above!");
3229  break;
3230 
3231  case 2u:
3232  {
3233  for (size_t n = 0; n < blocks16; ++n)
3234  {
3235  SSE::reverseChannelOrder2Channel8Bit32Elements((const uint8_t*)source, (uint8_t*)target);
3236 
3237  source += 16u * tChannels;
3238  target += 16u * tChannels;
3239  }
3240 
3241  break;
3242  }
3243 
3244  case 3u:
3245  {
3246  for (size_t n = 0; n < blocks16; ++n)
3247  {
3248  SSE::reverseChannelOrder3Channel8Bit48Elements((const uint8_t*)source, (uint8_t*)target);
3249 
3250  source += 16u * tChannels;
3251  target += 16u * tChannels;
3252  }
3253 
3254  break;
3255  }
3256 
3257  case 4u:
3258  {
3259  for (size_t n = 0; n < blocks16; ++n)
3260  {
3261  SSE::reverseChannelOrder4Channel8Bit64Elements((const uint8_t*)source, (uint8_t*)target);
3262 
3263  source += 16u * tChannels;
3264  target += 16u * tChannels;
3265  }
3266 
3267  break;
3268  }
3269 
3270  default:
3271  break;
3272  }
3273  }
3274 
3275 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3276 
3277  if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3278  {
3279  const size_t blocks16 = size / size_t(16);
3280 
3281  switch (tChannels)
3282  {
3283  case 1u:
3284  ocean_assert(false && "This should have been handled above!");
3285  break;
3286 
3287  case 2u:
3288  {
3289  for (size_t n = 0; n < blocks16; ++n)
3290  {
3291  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3293 
3294  const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3295  const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3296 
3297  const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298  const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3299 
3300  vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301  vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3302 
3303  source += 16u * tChannels;
3304  target += 16u * tChannels;
3305  }
3306 
3307  break;
3308  }
3309 
3310  case 3u:
3311  {
3312  for (size_t n = 0; n < blocks16; ++n)
3313  {
3314  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3316 
3317  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3318 
3319  uint8x16x3_t revSource_u_8x16x3;
3320  revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321  revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322  revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3323 
3324  vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3325 
3326  source += 16u * tChannels;
3327  target += 16u * tChannels;
3328  }
3329 
3330  break;
3331  }
3332 
3333  case 4u:
3334  {
3335  for (size_t n = 0; n < blocks16; ++n)
3336  {
3337  ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338  ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3339 
3340  const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3341  const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3342  const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)source + 32);
3343  const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)source + 48);
3344 
3345  const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346  const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347  const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348  const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3349 
3350  vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351  vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352  vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353  vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3354 
3355  source += 16u * tChannels;
3356  target += 16u * tChannels;
3357  }
3358 
3359  break;
3360  }
3361 
3362  default:
3363  break;
3364  }
3365  }
3366 
3367 #endif // OCEAN_HARDWARE_NEON_VERSION
3368 
3369  while (source != sourceEnd)
3370  {
3371  ocean_assert(source < sourceEnd);
3372 
3373  ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374  ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3375 
3376  for (unsigned int n = 0u; n < tChannels; ++n)
3377  {
3378  target[n] = source[tChannels - n - 1u];
3379  }
3380 
3381  source += tChannels;
3382  target += tChannels;
3383  }
3384 }
3385 
3386 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3387 inline void FrameChannels::shuffleRowChannels(const T* source, T* target, const size_t size, const void* /*unusedOptions*/)
3388 {
3389  static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3390  static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3391 
3392  static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3393 
3394  static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3395  static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3396  static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3397  static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3398  static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3399  static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3400  static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3401  static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3402 
3403  ocean_assert(source != nullptr && target != nullptr);
3404  ocean_assert(size != 0);
3405 
3406  const T* const sourceEnd = source + size * tSourceChannels;
3407 
3408 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3409 
3410  if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3411  {
3412  const size_t blocks16 = size / size_t(16);
3413 
3414  switch (tSourceChannels | ((tTargetChannels) << 4u))
3415  {
3416  // 4 -> 4
3417  case (4u | (4u << 4u)):
3418  {
3419  // the following shuffle patterns are known during compile time
3420 
3421  constexpr unsigned int offset1 = 0x04040404u;
3422  constexpr unsigned int offset2 = 0x08080808u;
3423  constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3424 
3425  // converting shufflePattern16 to shufflePattern16
3426  const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3427 
3428  const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429  const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430  const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3431 
3432  const __m128i shufflePattern128 = SSE::set128i((((unsigned long long)shufflePattern3) << 32ull) | (unsigned long long)shufflePattern2, (((unsigned long long)shufflePattern1) << 32ull) | (unsigned long long)shufflePattern0);
3433 
3434  for (size_t n = 0; n < blocks16; ++n)
3435  {
3436  SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 0), shufflePattern128), (uint8_t*)target + 0);
3437  SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 16), shufflePattern128), (uint8_t*)target + 16);
3438  SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 32), shufflePattern128), (uint8_t*)target + 32);
3439  SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 48), shufflePattern128), (uint8_t*)target + 48);
3440 
3441  source += 16u * tSourceChannels;
3442  target += 16u * tTargetChannels;
3443  }
3444 
3445  break;
3446  }
3447 
3448  default:
3449  // we do not have a NEON-based optimization
3450  break;
3451  }
3452  }
3453 
3454 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3455 
3456  if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3457  {
3458  const size_t blocks16 = size / size_t(16);
3459 
3460  switch (tSourceChannels | ((tTargetChannels) << 4u))
3461  {
3462  // 1 -> 3
3463  case (1u | (3u << 4u)):
3464  {
3465  static_assert(tSourceChannels != 1u || tShufflePattern == 0u, "Invalid shuffle patter!");
3466 
3467  for (size_t n = 0; n < blocks16; ++n)
3468  {
3469  const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3470 
3471  uint8x16x3_t target_u_8x16x3;
3472 
3473  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3474  {
3475  target_u_8x16x3.val[nT] = source_u_8x16;
3476  }
3477 
3478  vst3q_u8((uint8_t*)target, target_u_8x16x3);
3479 
3480  source += 16u * tSourceChannels;
3481  target += 16u * tTargetChannels;
3482  }
3483 
3484  break;
3485  }
3486 
3487  // 2 -> 1
3488  case (2u | (1u << 4u)):
3489  {
3490  for (size_t n = 0; n < blocks16; ++n)
3491  {
3492  const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3493 
3494  constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u; // possible index values {0, 1}
3495  static_assert(sourceChannel <= 1u, "Invalid shuffle pattern!");
3496  ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3497 
3498  const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3499 
3500  vst1q_u8((uint8_t*)target, target_u_8x16);
3501 
3502  source += 16u * tSourceChannels;
3503  target += 16u * tTargetChannels;
3504  }
3505 
3506  break;
3507  }
3508 
3509  // 2 -> 3
3510  case (2u | (3u << 4u)):
3511  {
3512  for (size_t n = 0; n < blocks16; ++n)
3513  {
3514  const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3515 
3516  uint8x16x3_t target_u_8x16x3;
3517 
3518  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3519  {
3520  ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3521 
3522  target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3523  }
3524 
3525  vst3q_u8((uint8_t*)target, target_u_8x16x3);
3526 
3527  source += 16u * tSourceChannels;
3528  target += 16u * tTargetChannels;
3529  }
3530 
3531  break;
3532  }
3533 
3534  // 2 -> 4
3535  case (2u | (4u << 4u)):
3536  {
3537  for (size_t n = 0; n < blocks16; ++n)
3538  {
3539  const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3540 
3541  uint8x16x4_t target_u_8x16x4;
3542 
3543  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3544  {
3545  ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3546 
3547  target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3548  }
3549 
3550  vst4q_u8((uint8_t*)target, target_u_8x16x4);
3551 
3552  source += 16u * tSourceChannels;
3553  target += 16u * tTargetChannels;
3554  }
3555 
3556  break;
3557  }
3558 
3559  // 3 -> 1
3560  case (3u | (1u << 4u)):
3561  {
3562  constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u; // possible index values {0, 1, 2}
3563  ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3564 
3565  for (size_t n = 0; n < blocks16; ++n)
3566  {
3567  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3568 
3569  const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3570 
3571  vst1q_u8((uint8_t*)target, target_u_8x16);
3572 
3573  source += 16u * tSourceChannels;
3574  target += 16u * tTargetChannels;
3575  }
3576 
3577  break;
3578  }
3579 
3580  // 3 -> 2
3581  case (3u | (2u << 4u)):
3582  {
3583  for (size_t n = 0; n < blocks16; ++n)
3584  {
3585  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3586 
3587  uint8x16x2_t target_u_8x16x2;
3588 
3589  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3590  {
3591  target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3592  }
3593 
3594  vst2q_u8((uint8_t*)target, target_u_8x16x2);
3595 
3596  source += 16u * tSourceChannels;
3597  target += 16u * tTargetChannels;
3598  }
3599 
3600  break;
3601  }
3602 
3603  // 3 -> 3
3604  case (3u | (3u << 4u)):
3605  {
3606  for (size_t n = 0; n < blocks16; ++n)
3607  {
3608  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3609 
3610  uint8x16x3_t target_u_8x16x3;
3611 
3612  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3613  {
3614  target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3615  }
3616 
3617  vst3q_u8((uint8_t*)target, target_u_8x16x3);
3618 
3619  source += 16u * tSourceChannels;
3620  target += 16u * tTargetChannels;
3621  }
3622 
3623  break;
3624  }
3625 
3626  // 4 -> 1
3627  case (4u | (1u << 4u)):
3628  {
3629  for (size_t n = 0; n < blocks16; ++n)
3630  {
3631  const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3632 
3633  constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u; // possible index values {0, 1, 2, 3}
3634  static_assert(sourceChannel <= 3u, "Invalid shuffle pattern!");
3635 
3636  ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3637 
3638  const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3639 
3640  vst1q_u8((uint8_t*)target, target_u_8x16);
3641 
3642  source += 16u * tSourceChannels;
3643  target += 16u * tTargetChannels;
3644  }
3645 
3646  break;
3647  }
3648 
3649  // 4 -> 2
3650  case (4u | (2u << 4u)):
3651  {
3652  for (size_t n = 0; n < blocks16; ++n)
3653  {
3654  const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3655 
3656  uint8x16x2_t target_u_8x16x2;
3657 
3658  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3659  {
3660  ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3661 
3662  target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3663  }
3664 
3665  vst2q_u8((uint8_t*)target, target_u_8x16x2);
3666 
3667  source += 16u * tSourceChannels;
3668  target += 16u * tTargetChannels;
3669  }
3670 
3671  break;
3672  }
3673 
3674  // 4 -> 3
3675  case (4u | (3u << 4u)):
3676  {
3677  for (size_t n = 0; n < blocks16; ++n)
3678  {
3679  const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3680 
3681  uint8x16x3_t target_u_8x16x3;
3682 
3683  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3684  {
3685  ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3686 
3687  target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3688  }
3689 
3690  vst3q_u8((uint8_t*)target, target_u_8x16x3);
3691 
3692  source += 16u * tSourceChannels;
3693  target += 16u * tTargetChannels;
3694  }
3695 
3696  break;
3697  }
3698 
3699  // 4 -> 4
3700  case (4u | (4u << 4u)):
3701  {
3702  for (size_t n = 0; n < blocks16; ++n)
3703  {
3704  const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3705 
3706  uint8x16x4_t target_u_8x16x4;
3707 
3708  for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3709  {
3710  ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3711 
3712  target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3713  }
3714 
3715  vst4q_u8((uint8_t*)target, target_u_8x16x4);
3716 
3717  source += 16u * tSourceChannels;
3718  target += 16u * tTargetChannels;
3719  }
3720 
3721  break;
3722  }
3723 
3724  default:
3725  // we do not have a NEON-based optimization
3726  break;
3727  }
3728  }
3729 
3730 #endif
3731 
3732  while (source != sourceEnd)
3733  {
3734  ocean_assert(source < sourceEnd);
3735 
3736  for (unsigned int n = 0u; n < tTargetChannels; ++n)
3737  {
3738  target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3739  }
3740 
3741  source += tSourceChannels;
3742  target += tTargetChannels;
3743  }
3744 }
3745 
3746 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3747 inline void FrameChannels::shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options)
3748 {
3749  static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3750  static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3751 
3752  static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3753  static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3754  static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3755  static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3756  static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3757  static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3758  static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3759  static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3760 
3761  ocean_assert(source != nullptr && target != nullptr);
3762  ocean_assert(size != 0);
3763 
3764  ocean_assert(options != nullptr);
3765 
3766  const T lastChannelValue = *(const T*)(options);
3767 
3768  const T* const sourceEnd = source + size * tSourceChannels;
3769 
3770 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3771 
3772  if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3773  {
3774  const size_t blocks16 = size / size_t(16);
3775 
3776  switch (tSourceChannels | ((tTargetChannels) << 4u))
3777  {
3778  // 1 -> 4
3779  case (1u | (4u << 4u)):
3780  {
3781  ocean_assert(tShufflePattern == 0u);
3782 
3783  const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3784 
3785  uint8x16x4_t target_u_8x16x4;
3786  target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3787 
3788  for (size_t n = 0; n < blocks16; ++n)
3789  {
3790  const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3791 
3792  for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3793  {
3794  target_u_8x16x4.val[nT] = source_u_8x16;
3795  }
3796 
3797  vst4q_u8((uint8_t*)target, target_u_8x16x4);
3798 
3799  source += 16u * tSourceChannels;
3800  target += 16u * tTargetChannels;
3801  }
3802 
3803  break;
3804  }
3805 
3806  // 3 -> 4
3807  case (3u | (4u << 4u)):
3808  {
3809  const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3810 
3811  uint8x16x4_t target_u_8x16x4;
3812  target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3813 
3814  for (size_t n = 0; n < blocks16; ++n)
3815  {
3816  const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3817 
3818  for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3819  {
3820  target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3821  }
3822 
3823  vst4q_u8((uint8_t*)target, target_u_8x16x4);
3824 
3825  source += 16u * tSourceChannels;
3826  target += 16u * tTargetChannels;
3827  }
3828 
3829  break;
3830  }
3831 
3832  // 4 -> 4
3833  case (4u | (4u << 4u)):
3834  {
3835  const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3836 
3837  uint8x16x4_t target_u_8x16x4;
3838  target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3839 
3840  for (size_t n = 0; n < blocks16; ++n)
3841  {
3842  const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3843 
3844  for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3845  {
3846  target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)]; // possible index values {0, 1, 2, 3}
3847  }
3848 
3849  vst4q_u8((uint8_t*)target, target_u_8x16x4);
3850 
3851  source += 16u * tSourceChannels;
3852  target += 16u * tTargetChannels;
3853  }
3854 
3855  break;
3856  }
3857 
3858  default:
3859  // we do not have a NEON-based optimization
3860  break;
3861  }
3862  }
3863 
3864 #endif
3865 
3866  while (source != sourceEnd)
3867  {
3868  ocean_assert(source < sourceEnd);
3869 
3870  for (unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3871  {
3872  target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873  target[tTargetChannels - 1u] = lastChannelValue;
3874  }
3875 
3876  source += tSourceChannels;
3877  target += tTargetChannels;
3878  }
3879 }
3880 
3881 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3882 inline void FrameChannels::shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3883 {
3884  static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3885  static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3886 
3887  static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3888 
3889  static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3890  static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3891  static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3892  static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3893  static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3894  static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3895  static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3896  static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3897 
3898  ocean_assert(source != nullptr && target != nullptr);
3899  ocean_assert(width >= 1u && height >= 1u);
3900 
3901  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902  const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3903 
3904  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3905 
3906  FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, nullptr, worker);
3907 }
3908 
3909 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3910 inline void FrameChannels::shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3911 {
3912  static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3913  static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3914 
3915  static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3916  static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3917  static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3918  static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3919  static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3920  static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3921  static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3922  static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3923 
3924  ocean_assert(source != nullptr && target != nullptr);
3925  ocean_assert(width >= 1u && height >= 1u);
3926 
3927  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928  const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3929 
3930  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3931 
3932  const T options = newChannelValue;
3933 
3934  FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3935 }
3936 
3937 template <unsigned int tChannels>
3938 inline void FrameChannels::narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3939 {
3940  static_assert(tChannels >= 1u, "Invalid channel number!");
3941 
3942  ocean_assert(source != nullptr && target != nullptr);
3943  ocean_assert(width >= 1u && height >= 1u);
3944 
3945  const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3947 
3948  const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3949 
3950  FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous, nullptr, worker);
3951 }
3952 
3953 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
3954 void FrameChannels::applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker)
3955 {
3956  static_assert(tChannels > 0u, "Invalid channel number!");
3957 
3958  ocean_assert(source && target);
3959  ocean_assert(width != 0u && height != 0u);
3960 
3961  if (worker)
3962  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3963  else
3964  applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3965 }
3966 
3967 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
3968 void FrameChannels::applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3969 {
3970  static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3971  static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3972 
3973  ocean_assert(source && target);
3974  ocean_assert(width != 0u && height != 0u);
3975 
3976  if (worker)
3977  {
3978  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3979  }
3980  else
3981  {
3982  applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3983  }
3984 }
3985 
3986 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3987 void FrameChannels::applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3988 {
3989  static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3990  static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3991 
3992  ocean_assert(source0 && source1 && target);
3993  ocean_assert(width != 0u && height != 0u);
3994 
3995  if (worker)
3996  {
3997  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3998  }
3999  else
4000  {
4001  FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4002  }
4003 }
4004 
4005 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4006 void FrameChannels::applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker)
4007 {
4008  static_assert(tSourceChannels > 0u, "Invalid source channel number!");
4009  static_assert(tTargetChannels > 0u, "Invalid target channel number!");
4010 
4011  ocean_assert(source != nullptr && target != nullptr);
4012  ocean_assert(width != 0u && height != 0u);
4013 
4014  const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4015  const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4016 
4017  if (worker)
4018  {
4019  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4020  }
4021  else
4022  {
4023  applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4024  }
4025 }
4026 
4027 template <typename T, unsigned int tChannels>
4028 inline void FrameChannels::transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4029 {
4030  ocean_assert(source != nullptr && target != nullptr);
4031  ocean_assert(width >= 1u && height >= 1u);
4032 
4033  const unsigned int bytesPerRow = width * sizeof(T) * tChannels;
4034 
4035  const unsigned int sourceStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * sourcePaddingElements;
4036  const unsigned int targetStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * targetPaddingElements;
4037 
4038  typedef typename TypeMapper<T>::Type MappedType;
4039 
4040  const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction = (const RowReversePixelOrderFunction<void>)(FrameChannels::reverseRowPixelOrder<MappedType, tChannels>);
4041 
4042  if (worker && height > 200u)
4043  {
4044  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::transformGenericSubset, (const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4045  }
4046  else
4047  {
4048  transformGenericSubset((const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4049  }
4050 }
4051 
4052 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4053 void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4054 {
4055  static_assert(tChannels >= 2u, "Invalid channel number!");
4056  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4057 
4058  ocean_assert(frame != nullptr);
4059  ocean_assert(width >= 1u && height >= 1u);
4060 
4061  if (worker && height > 200u)
4062  {
4063  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4064  }
4065  else
4066  {
4067  premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4068  }
4069 }
4070 
4071 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4072 void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4073 {
4074  static_assert(tChannels >= 2u, "Invalid channel number!");
4075  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4076 
4077  ocean_assert(source != nullptr && target != nullptr);
4078  ocean_assert(width >= 1u && height >= 1u);
4079 
4080  if (worker && height > 200u)
4081  {
4082  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4083  }
4084  else
4085  {
4086  premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4087  }
4088 }
4089 
4090 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4091 void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4092 {
4093  static_assert(tChannels >= 2u, "Invalid channel number!");
4094  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4095 
4096  ocean_assert(frame != nullptr);
4097  ocean_assert(width >= 1u && height >= 1u);
4098 
4099  if (worker && height > 200u)
4100  {
4101  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4102  }
4103  else
4104  {
4105  straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4106  }
4107 }
4108 
4109 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4110 void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4111 {
4112  static_assert(tChannels >= 2u, "Invalid channel number!");
4113  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4114 
4115  ocean_assert(source != nullptr && target != nullptr);
4116  ocean_assert(width >= 1u && height >= 1u);
4117 
4118  if (worker && height > 200u)
4119  {
4120  worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4121  }
4122  else
4123  {
4124  straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4125  }
4126 }
4127 
4128 template <unsigned int tChannels>
4129 void FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* /* unusedParameters */)
4130 {
4131  static_assert(tChannels >= 1u, "Invalid channel number!");
4132 
4133  ocean_assert(source != nullptr && target != nullptr);
4134  ocean_assert(size > 0);
4135 
4136  const uint16_t* const sourceEnd = source + size * tChannels;
4137 
4138 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4139 
4140  const size_t blocks8 = size / size_t(8);
4141 
4142  switch (tChannels)
4143  {
4144  case 4u:
4145  {
4146  for (size_t n = 0; n < blocks8; ++n)
4147  {
4148  const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4149  const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4150  const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4151  const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4152 
4153  const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8)); // narrowing rounded right shift: target = (source + 128) / 256
4154  const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4155 
4156  vst1q_u8(target + 0, targetAB_u_8x16);
4157  vst1q_u8(target + 16, targetCD_u_8x16);
4158 
4159  source += 8u * tChannels;
4160  target += 8u * tChannels;
4161  }
4162 
4163  break;
4164  }
4165 
4166  default:
4167  break;
4168  }
4169 
4170 #endif
4171 
4172  while (source != sourceEnd)
4173  {
4174  ocean_assert(source < sourceEnd);
4175 
4176  for (unsigned int n = 0u; n < tChannels; ++n)
4177  {
4178  ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4179  target[n] = (uint8_t)(source[n] >> 8u);
4180  }
4181 
4182  source += tChannels;
4183  target += tChannels;
4184  }
4185 }
4186 
4187 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4188 void FrameChannels::addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options)
4189 {
4190  static_assert(tSourceChannels != 0u, "Invalid channel number!");
4191  static_assert(sizeof(size_t) == sizeof(const T*), "Invalid pointer size!");
4192 
4193  ocean_assert(sources != nullptr && targets != nullptr);
4194  ocean_assert(width != 0u && height != 0u);
4195  ocean_assert(multipleRowIndex < height);
4196  ocean_assert(options != nullptr);
4197 
4198  const T* source = (const T*)(sources[0]);
4199  const T* sourceOneChannel = (const T*)(sources[1]);
4200  ocean_assert(source != nullptr && sourceOneChannel != nullptr);
4201 
4202  T* target = (T*)(targets[0]);
4203  ocean_assert(target != nullptr);
4204 
4205  const unsigned int* uintOptions = (const unsigned int*)options;
4206  ocean_assert(uintOptions != nullptr);
4207 
4208  const unsigned int sourcePaddingElements = uintOptions[0];
4209  const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4210  const unsigned int targetPaddingElements = uintOptions[2];
4211 
4212  const unsigned int targetChannels = tSourceChannels + 1u;
4213 
4214  const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4215  const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4216  const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4217 
4218  const bool flipTarget = conversionFlag == CONVERT_FLIPPED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4219  const bool mirrorTarget = conversionFlag == CONVERT_MIRRORED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4220 
4221  const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4222  const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4223  T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4224 
4225  if (mirrorTarget == false)
4226  {
4227  for (unsigned int n = 0u; n < width; ++n)
4228  {
4229  if constexpr (tAddToFront)
4230  {
4231  targetRow[0] = sourceOneChannelRow[0];
4232 
4233  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4234  {
4235  targetRow[c + 1u] = sourceRow[c];
4236  }
4237  }
4238  else
4239  {
4240  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4241  {
4242  targetRow[c] = sourceRow[c];
4243  }
4244 
4245  targetRow[tSourceChannels] = sourceOneChannelRow[0];
4246  }
4247 
4248  sourceRow += tSourceChannels;
4249  sourceOneChannelRow++;
4250 
4251  targetRow += targetChannels;
4252  }
4253  }
4254  else
4255  {
4256  targetRow += targetChannels * (width - 1u);
4257 
4258  for (unsigned int n = 0u; n < width; ++n)
4259  {
4260  if constexpr (tAddToFront)
4261  {
4262  targetRow[0] = sourceOneChannelRow[0];
4263 
4264  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4265  {
4266  targetRow[c + 1u] = sourceRow[c];
4267  }
4268  }
4269  else
4270  {
4271  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4272  {
4273  targetRow[c] = sourceRow[c];
4274  }
4275 
4276  targetRow[tSourceChannels] = sourceOneChannelRow[0];
4277  }
4278 
4279  sourceRow += tSourceChannels;
4280  sourceOneChannelRow++;
4281 
4282  targetRow -= targetChannels;
4283  }
4284  }
4285 }
4286 
4287 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4288 void FrameChannels::addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter)
4289 {
4290  static_assert(tSourceChannels != 0u, "Invalid channel number!");
4291 
4292  ocean_assert(source != nullptr && target != nullptr);
4293  ocean_assert(size > 0);
4294  ocean_assert(channelValueParameter != nullptr);
4295 
4296  const T& channelValue = *((const T*)channelValueParameter);
4297 
4298  const unsigned int targetChannels = tSourceChannels + 1u;
4299 
4300  for (size_t n = 0; n < size; ++n)
4301  {
4302  if constexpr (tAddToFront)
4303  {
4304  target[0] = channelValue;
4305 
4306  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4307  {
4308  target[c + 1u] = source[c];
4309  }
4310  }
4311  else
4312  {
4313  for (unsigned int c = 0u; c < tSourceChannels; ++c)
4314  {
4315  target[c] = source[c];
4316  }
4317 
4318  target[tSourceChannels] = channelValue;
4319  }
4320 
4321  source += tSourceChannels;
4322  target += targetChannels;
4323  }
4324 }
4325 
4326 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
4327 void FrameChannels::copyChannelRow(const T* source, T* target, const size_t size, const void* /*unusedParameters*/)
4328 {
4329  static_assert(tSourceChannels != 0u, "Invalid channel number!");
4330  static_assert(tTargetChannels != 0u, "Invalid channel number!");
4331 
4332  static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel number!");
4333  static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel number!");
4334 
4335  ocean_assert(source != nullptr && target != nullptr);
4336  ocean_assert(size > 0);
4337 
4338  for (size_t n = 0; n < size; ++n)
4339  {
4340  target[tTargetChannelIndex] = source[tSourceChannelIndex];
4341 
4342  source += tSourceChannels;
4343  target += tTargetChannels;
4344  }
4345 }
4346 
4347 template <typename TSource, typename TTarget>
4348 void FrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
4349 {
4350  ocean_assert(sourceFrame != nullptr);
4351  ocean_assert(targetFrames != nullptr);
4352 
4353  ocean_assert(width != 0u && height != 0u);
4354  ocean_assert(channels != 0u);
4355 
4356 #ifdef OCEAN_DEBUG
4357  for (unsigned int c = 0u; c < channels; ++c)
4358  {
4359  ocean_assert(targetFrames[c] != nullptr);
4360  }
4361 #endif
4362 
4363  if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
4364  {
4365  for (unsigned int n = 0u; n < width * height; ++n)
4366  {
4367  for (unsigned int c = 0u; c < channels; ++c)
4368  {
4369  targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4370  }
4371  }
4372  }
4373  else if (targetFramesPaddingElements == nullptr)
4374  {
4375  ocean_assert(sourceFramePaddingElements != 0u);
4376 
4377  const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4378 
4379  for (unsigned int y = 0u; y < height; ++y)
4380  {
4381  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4382 
4383  const unsigned int targetRowOffset = y * width;
4384 
4385  for (unsigned int x = 0u; x < width; ++x)
4386  {
4387  for (unsigned int c = 0u; c < channels; ++c)
4388  {
4389  *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4390  }
4391  }
4392  }
4393  }
4394  else
4395  {
4396  const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4397 
4398  Indices32 targetFrameStrideElements(channels);
4399 
4400  for (unsigned int c = 0u; c < channels; ++c)
4401  {
4402  targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4403  }
4404 
4405  for (unsigned int y = 0u; y < height; ++y)
4406  {
4407  const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4408 
4409  for (unsigned int x = 0u; x < width; ++x)
4410  {
4411  for (unsigned int c = 0u; c < channels; ++c)
4412  {
4413  *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4414  }
4415  }
4416  }
4417  }
4418 }
4419 
4420 template <typename TSource, typename TTarget>
4421 void FrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
4422 {
4423  ocean_assert(sourceFrames != nullptr);
4424  ocean_assert(targetFrame != nullptr);
4425 
4426  ocean_assert(width != 0u && height != 0u);
4427  ocean_assert(channels != 0u);
4428 
4429  bool allSourceFramesContinuous = true;
4430 
4431  if (sourceFramesPaddingElements != nullptr)
4432  {
4433  for (unsigned int n = 0u; n < channels; ++n)
4434  {
4435  if (sourceFramesPaddingElements[n] != 0u)
4436  {
4437  allSourceFramesContinuous = false;
4438  break;
4439  }
4440  }
4441  }
4442 
4443  if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4444  {
4445  for (unsigned int n = 0u; n < width * height; ++n)
4446  {
4447  for (unsigned int c = 0u; c < channels; ++c)
4448  {
4449  targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4450  }
4451  }
4452  }
4453  else
4454  {
4455  const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4456 
4457  Indices32 sourceFrameStrideElements(channels);
4458 
4459  for (unsigned int c = 0u; c < channels; ++c)
4460  {
4461  if (sourceFramesPaddingElements == nullptr)
4462  {
4463  sourceFrameStrideElements[c] = width;
4464  }
4465  else
4466  {
4467  sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4468  }
4469  }
4470 
4471  for (unsigned int y = 0u; y < height; ++y)
4472  {
4473  TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
4474 
4475  for (unsigned int x = 0u; x < width; ++x)
4476  {
4477  for (unsigned int c = 0u; c < channels; ++c)
4478  {
4479  *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4480  }
4481  }
4482  }
4483  }
4484 }
4485 
4486 template <typename T, unsigned int tChannel, unsigned int tChannels>
4487 void FrameChannels::setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4488 {
4489  static_assert(tChannels >= 1u, "Invalid channel number!");
4490  static_assert(tChannel < tChannels, "Invalid channel index!");
4491 
4492  ocean_assert(frame != nullptr);
4493 
4494  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4495 
4496  frame += firstRow * frameStrideElements + tChannel;
4497 
4498  for (unsigned int n = 0u; n < numberRows; ++n)
4499  {
4500  for (unsigned int x = 0u; x < width; ++x)
4501  {
4502  frame[x * tChannels] = value;
4503  }
4504 
4505  frame += frameStrideElements;
4506  }
4507 }
4508 
4509 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
4510 void FrameChannels::applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4511 {
4512  static_assert(tChannels >= 1u, "Invalid channel number");
4513 
4514  ocean_assert(source && target);
4515  ocean_assert(source != target);
4516 
4517  ocean_assert(numberRows > 0u);
4518  ocean_assert(firstRow + numberRows <= height);
4519 
4520  const unsigned int widthElements = width * tChannels;
4521  const unsigned int targetBlockSize = widthElements * numberRows;
4522 
4523  switch (conversionFlag)
4524  {
4525  case CONVERT_NORMAL:
4526  {
4527  source += firstRow * widthElements;
4528  target += firstRow * widthElements;
4529 
4530  const T* const targetEnd = target + targetBlockSize;
4531 
4532  while (target != targetEnd)
4533  {
4534  tPixelFunction(source, target);
4535 
4536  source += tChannels;
4537  target += tChannels;
4538  }
4539 
4540  break;
4541  }
4542 
4543  case CONVERT_FLIPPED:
4544  {
4545  source += firstRow * widthElements;
4546  target += width * height * tChannels - (firstRow + 1u) * widthElements;
4547 
4548  const T* const targetEnd = target - targetBlockSize;
4549 
4550  while (target != targetEnd)
4551  {
4552  const T* const targetRowEnd = target + widthElements;
4553 
4554  while (target != targetRowEnd)
4555  {
4556  tPixelFunction(source, target);
4557 
4558  source += tChannels;
4559  target += tChannels;
4560  }
4561 
4562  target -= (widthElements << 1); // width * tChannels * 2
4563  }
4564 
4565  break;
4566  }
4567 
4568  case CONVERT_MIRRORED:
4569  {
4570  source += firstRow * widthElements;
4571  target += (firstRow + 1u) * widthElements;
4572 
4573  const T* const targetEnd = target + targetBlockSize;
4574 
4575  while (target != targetEnd)
4576  {
4577  const T* const targetRowEnd = target - widthElements;
4578 
4579  while (target != targetRowEnd)
4580  {
4581  tPixelFunction(source, target -= tChannels);
4582 
4583  source += tChannels;
4584  }
4585 
4586  target += widthElements << 1; // width * tChannels * 2;
4587  }
4588 
4589  break;
4590  }
4591 
4593  {
4594  source += firstRow * widthElements;
4595  target += width * height * tChannels - firstRow * widthElements;
4596 
4597  const T* const targetEnd = target - targetBlockSize;
4598 
4599  while (target != targetEnd)
4600  {
4601  tPixelFunction(source, target -= tChannels);
4602 
4603  source += tChannels;
4604  }
4605 
4606  break;
4607  }
4608 
4609  // default: this case is not handled
4610  }
4611 }
4612 
4613 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
4614 void FrameChannels::applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4615 {
4616  static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4617  static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4618 
4619  ocean_assert(source && target);
4620  ocean_assert((void*)source != (void*)target);
4621 
4622  ocean_assert(numberRows != 0u);
4623  ocean_assert(firstRow + numberRows <= height);
4624 
4625  const unsigned int sourceWidthElements = width * tSourceChannels;
4626  const unsigned int targetWidthElements = width * tTargetChannels;
4627 
4628  const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4629  const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4630 
4631  switch (conversionFlag)
4632  {
4633  case CONVERT_NORMAL:
4634  {
4635  for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4636  {
4637  const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4638  TTarget* targetPixel = target + rowIndex * targetStrideElements;
4639 
4640  for (unsigned int x = 0u; x < width; ++x)
4641  {
4642  tPixelFunction(sourcePixel, targetPixel);
4643 
4644  sourcePixel += tSourceChannels;
4645  targetPixel += tTargetChannels;
4646  }
4647  }
4648 
4649  break;
4650  }
4651 
4652  case CONVERT_FLIPPED:
4653  {
4654  for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4655  {
4656  const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4657  TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4658 
4659  for (unsigned int x = 0u; x < width; ++x)
4660  {
4661  tPixelFunction(sourcePixel, targetPixel);
4662 
4663  sourcePixel += tSourceChannels;
4664  targetPixel += tTargetChannels;
4665  }
4666  }
4667 
4668  break;
4669  }
4670 
4671  case CONVERT_MIRRORED:
4672  {
4673  for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4674  {
4675  const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4676 
4677  TTarget* const targetRowBegin = target + rowIndex * targetStrideElements;
4678  TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4679 
4680  for (unsigned int x = 0u; x < width; ++x)
4681  {
4682  ocean_assert(targetPixel >= targetRowBegin);
4683  tPixelFunction(sourcePixel, targetPixel);
4684 
4685  sourcePixel += tSourceChannels;
4686  targetPixel -= tTargetChannels;
4687  }
4688  }
4689 
4690  break;
4691  }
4692 
4694  {
4695  for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4696  {
4697  const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4698 
4699  TTarget* const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4700  TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4701 
4702  for (unsigned int x = 0u; x < width; ++x)
4703  {
4704  ocean_assert(targetPixel >= targetRowBegin);
4705  tPixelFunction(sourcePixel, targetPixel);
4706 
4707  sourcePixel += tSourceChannels;
4708  targetPixel -= tTargetChannels;
4709  }
4710  }
4711 
4712  break;
4713  }
4714 
4715  // default: this case is not handled
4716  }
4717 }
4718 
4719 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4720 void FrameChannels::applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4721 {
4722  static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4723  static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4724  static_assert(tOperator, "Invalid operator function");
4725 
4726  ocean_assert(source0 != nullptr && source1 != nullptr && target != nullptr);
4727  ocean_assert((const void*)(source0) != (const void*)(target));
4728  ocean_assert((const void*)(source1) != (const void*)(target));
4729 
4730  ocean_assert(numberRows != 0u);
4731  ocean_assert(firstRow + numberRows <= height);
4732 
4733  const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4734  const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4735 
4736  const unsigned int targetWidthElements = width * tTargetChannels;
4737 
4738  const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4739 
4740  switch (conversionFlag)
4741  {
4742  case CONVERT_NORMAL:
4743  {
4744  for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4745  {
4746  const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4747  const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4748 
4749  TTarget* rowTarget = target + rowIndex * targetStrideElements;
4750  const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4751 
4752  while (rowTarget != rowTargetEnd)
4753  {
4754  ocean_assert(rowTarget < rowTargetEnd);
4755 
4756  tOperator(rowSource0, rowSource1, rowTarget);
4757 
4758  rowSource0 += tSourceChannels;
4759  rowSource1 += tSourceChannels;
4760 
4761  rowTarget += tTargetChannels;
4762  }
4763  }
4764 
4765  return;
4766  }
4767 
4768  case CONVERT_FLIPPED:
4769  {
4770  for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4771  {
4772  const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4773  const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4774 
4775  TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4776  const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4777 
4778  while (rowTarget != rowTargetEnd)
4779  {
4780  ocean_assert(rowTarget < rowTargetEnd);
4781 
4782  tOperator(rowSource0, rowSource1, rowTarget);
4783 
4784  rowSource0 += tSourceChannels;
4785  rowSource1 += tSourceChannels;
4786 
4787  rowTarget += tTargetChannels;
4788  }
4789  }
4790 
4791  return;
4792  }
4793 
4794  case CONVERT_MIRRORED:
4795  {
4796  for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4797  {
4798  const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4799  const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4800 
4801  TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4802  const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4803 
4804  while (rowTarget != rowTargetEnd)
4805  {
4806  ocean_assert(rowTarget > rowTargetEnd);
4807 
4808  tOperator(rowSource0, rowSource1, rowTarget);
4809 
4810  rowSource0 += tSourceChannels;
4811  rowSource1 += tSourceChannels;
4812 
4813  rowTarget -= tTargetChannels;
4814  }
4815  }
4816 
4817  return;
4818  }
4819 
4821  {
4822  for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4823  {
4824  const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4825  const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4826 
4827  TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4828  const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4829 
4830  while (rowTarget != rowTargetEnd)
4831  {
4832  ocean_assert(rowTarget > rowTargetEnd);
4833 
4834  tOperator(rowSource0, rowSource1, rowTarget);
4835 
4836  rowSource0 += tSourceChannels;
4837  rowSource1 += tSourceChannels;
4838 
4839  rowTarget -= tTargetChannels;
4840  }
4841  }
4842 
4843  return;
4844  }
4845 
4846  default:
4847  ocean_assert(false && "This should never happen!");
4848  break;
4849  }
4850 }
4851 
4852 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4853 void FrameChannels::applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
4854 {
4855  static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4856  static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4857 
4858  ocean_assert(source != nullptr && target != nullptr);
4859  ocean_assert((const void*)source != (const void*)target);
4860 
4861  ocean_assert(width * tSourceChannels <= sourceStrideElements);
4862  ocean_assert(width * tTargetChannels <= targetStrideElements);
4863 
4864  ocean_assert(rowOperatorFunction != nullptr);
4865 
4866  ocean_assert(numberRows != 0u);
4867  ocean_assert(firstRow + numberRows <= height);
4868 
4869  for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4870  {
4871  rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4872  }
4873 }
4874 
4875 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
4876 void FrameChannels::convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4877 {
4878  static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid channel factors!");
4879 
4880  ocean_assert(channelMultiplicationFactors_128 != nullptr);
4881  const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4882  ocean_assert(channelFactors_128 != nullptr);
4883 
4884  const unsigned int factorChannel0_128 = channelFactors_128[0];
4885  const unsigned int factorChannel1_128 = channelFactors_128[1];
4886  const unsigned int factorChannel2_128 = channelFactors_128[2];
4887 
4888  ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4889  ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4890 
4891  ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4892  ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4893  ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4894 
4895  ocean_assert(source != nullptr && target != nullptr && size >= 1);
4896 
4897  const uint8_t* const targetEnd = target + size;
4898 
4899 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4900 
4901  constexpr size_t blockSize = 16;
4902  const size_t blocks = size / blockSize;
4903 
4904  const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4905  const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4906  const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4907 
4908  for (size_t n = 0; n < blocks; ++n)
4909  {
4910  convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, multiplicationFactors0_128_u_16x8, multiplicationFactors1_128_u_16x8, multiplicationFactors2_128_u_16x8);
4911 
4912  source += blockSize * size_t(3);
4913  target += blockSize;
4914  }
4915 
4916 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4917 
4918  constexpr size_t blockSize = 8;
4919  const size_t blocks = size / blockSize;
4920 
4921  const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4922  const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4923  const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4924 
4925  for (size_t n = 0; n < blocks; ++n)
4926  {
4927  convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4928 
4929  source += blockSize * size_t(3);
4930  target += blockSize;
4931  }
4932 
4933 #endif
4934 
4935  while (target != targetEnd)
4936  {
4937  ocean_assert(target < targetEnd);
4938 
4939  const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4940  const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4941  const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4942 
4943  *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4944  source += 3;
4945  }
4946 }
4947 
4948 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
4949 void FrameChannels::convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4950 {
4951  static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid channel factors!");
4952 
4953  ocean_assert(channelMultiplicationFactors_128 != nullptr);
4954  const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4955  ocean_assert(channelFactors_128 != nullptr);
4956 
4957  const unsigned int factorChannel0_128 = channelFactors_128[0];
4958  const unsigned int factorChannel1_128 = channelFactors_128[1];
4959  const unsigned int factorChannel2_128 = channelFactors_128[2];
4960  const unsigned int factorChannel3_128 = channelFactors_128[3];
4961 
4962  ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4963  ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4964 
4965  ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4966  ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4967  ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4968  ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4969 
4970  ocean_assert(source != nullptr && target != nullptr && size >= 1);
4971 
4972  const uint8_t* const targetEnd = target + size;
4973 
4974 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4975 
4976  constexpr size_t blockSize = 16;
4977  const size_t blocks = size / blockSize;
4978 
4979  const __m128i m128_multiplicationFactors = _mm_set1_epi32(int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4980 
4981  for (size_t n = 0; n < blocks; ++n)
4982  {
4983  convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, m128_multiplicationFactors);
4984 
4985  source += blockSize * size_t(4);
4986  target += blockSize;
4987  }
4988 
4989 #elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4990 
4991  constexpr size_t blockSize = 8;
4992  const size_t blocks = size / blockSize;
4993 
4994  const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4995  const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4996  const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4997  const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
4998 
4999  for (size_t n = 0; n < blocks; ++n)
5000  {
5001  convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5002 
5003  source += blockSize * size_t(4);
5004  target += blockSize;
5005  }
5006 
5007 #endif
5008 
5009  while (target != targetEnd)
5010  {
5011  ocean_assert(target < targetEnd);
5012 
5013  const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5014  const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5015  const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5016  const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5017 
5018  *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5019  source += 4;
5020  }
5021 }
5022 
5023 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5024 void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5025 {
5026  static_assert(tChannels >= 2u, "Invalid channel number!");
5027  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5028 
5029  ocean_assert(frame != nullptr);
5030  ocean_assert(width >= 1u);
5031 
5032  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5033 
5034  uint8_t* frameRow = frame + frameStrideElements * firstRow;
5035 
5036  for (unsigned int y = 0u; y < numberRows; ++y)
5037  {
5038  for (unsigned int x = 0u; x < width; ++x)
5039  {
5040  if (frameRow[tAlphaChannelIndex])
5041  {
5042  const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5043 
5044  for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5045  {
5046  if (channelIndex != tAlphaChannelIndex)
5047  {
5048  frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5049  }
5050  }
5051  }
5052 
5053  frameRow += tChannels;
5054  }
5055 
5056  frameRow += framePaddingElements;
5057  }
5058 }
5059 
5060 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5061 void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5062 {
5063  static_assert(tChannels >= 2u, "Invalid channel number!");
5064  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5065 
5066  ocean_assert(source != nullptr && target != nullptr);
5067  ocean_assert(width >= 1u);
5068 
5069  const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5070  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5071 
5072  const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5073  uint8_t* targetRow = target + targetStrideElements * firstRow;
5074 
5075  for (unsigned int y = 0u; y < numberRows; ++y)
5076  {
5077  for (unsigned int x = 0u; x < width; ++x)
5078  {
5079  if (sourceRow[tAlphaChannelIndex])
5080  {
5081  const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5082 
5083  for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5084  {
5085  if (channelIndex != tAlphaChannelIndex)
5086  {
5087  targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5088  }
5089  else
5090  {
5091  targetRow[channelIndex] = sourceRow[channelIndex];
5092  }
5093  }
5094  }
5095  else
5096  {
5097  for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5098  {
5099  targetRow[channelIndex] = sourceRow[channelIndex];
5100  }
5101  }
5102 
5103  sourceRow += tChannels;
5104  targetRow += tChannels;
5105  }
5106 
5107  sourceRow += sourcePaddingElements;
5108  targetRow += targetPaddingElements;
5109  }
5110 }
5111 
5112 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5113 void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5114 {
5115  static_assert(tChannels >= 2u, "Invalid channel number!");
5116  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5117 
5118  ocean_assert(frame != nullptr);
5119  ocean_assert(width >= 1u);
5120 
5121  const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5122 
5123  uint8_t* frameRow = frame + frameStrideElements * firstRow;
5124 
5125  for (unsigned int y = 0u; y < numberRows; ++y)
5126  {
5127  for (unsigned int x = 0u; x < width; ++x)
5128  {
5129  for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5130  {
5131  if (channelIndex != tAlphaChannelIndex)
5132  {
5133  frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5134  }
5135  }
5136 
5137  frameRow += tChannels;
5138  }
5139 
5140  frameRow += framePaddingElements;
5141  }
5142 }
5143 
5144 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5145 void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5146 {
5147  static_assert(tChannels >= 2u, "Invalid channel number!");
5148  static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5149 
5150  ocean_assert(source != nullptr && target != nullptr);
5151  ocean_assert(width >= 1u);
5152 
5153  const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5154  const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5155 
5156  const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5157  uint8_t* targetRow = target + targetStrideElements * firstRow;
5158 
5159  for (unsigned int y = 0u; y < numberRows; ++y)
5160  {
5161  for (unsigned int x = 0u; x < width; ++x)
5162  {
5163  for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5164  {
5165  if (channelIndex != tAlphaChannelIndex)
5166  {
5167  targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5168  }
5169  else
5170  {
5171  targetRow[channelIndex] = sourceRow[channelIndex];
5172  }
5173  }
5174 
5175  sourceRow += tChannels;
5176  targetRow += tChannels;
5177  }
5178 
5179  sourceRow += sourcePaddingElements;
5180  targetRow += targetPaddingElements;
5181  }
5182 }
5183 
5184 #if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5185 
5186 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8)
5187 {
5188  ocean_assert(source != nullptr && target != nullptr);
5189 
5190  // the documentation of this function is designed for RGB24 to Y8 conversion
5191  // however, in general this function can be used to apply a linear combination on the four source channels
5192  // to create one output channel
5193 
5194  // precise color space conversion:
5195  // Y = 0.299 * R + 0.587 * G + 0.114 * B
5196 
5197  // approximation:
5198  // Y = (38 * R + 75 * G + 15 * B) / 128
5199 
5200  // we expect the following input pattern (for here RGB24):
5201  // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5202  // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5203 
5204  // we store eight 16 bit values holding 64 for rounding purpose:
5205  const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5206 
5207  const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5208  const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5209  const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5210 
5211  __m128i channel0_u_8x16;
5212  __m128i channel1_u_8x16;
5213  __m128i channel2_u_8x16;
5214  SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5215 
5216  // now we need 16 bit values instead of 8 bit values
5217 
5218  const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5219  const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5220  const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5221 
5222  const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5223  const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5224  const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5225 
5226  // we multiply each channel with the corresponding multiplication factors
5227 
5228  const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5229  const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5230 
5231  const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5232  const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5233 
5234  const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5235  const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5236 
5237  // we sum up all results and add 64 for rounding purpose
5238  const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5239  const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5240 
5241  // we shift the multiplication results by 7 bits (= 128)
5242  const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5243  const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5244 
5245  // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5246  const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5247 
5248  // and we can store the result
5249  _mm_storeu_si128((__m128i*)target, result_u_8x16);
5250 }
5251 
5252 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8)
5253 {
5254  ocean_assert(source != nullptr && target != nullptr);
5255 
5256  // the documentation of this function designed for RGB24 to YUV24 conversion
5257 
5258  // precise color space conversion:
5259  // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5260  // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5261  // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5262  // | 1 |
5263 
5264  // approximation:
5265  // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5266  // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5267  // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5268 
5269  // we expect the following input pattern (for here RGB24):
5270  // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5271  // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5272 
5273  const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5274  const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5275  const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5276 
5277  __m128i channel0_u_8x16;
5278  __m128i channel1_u_8x16;
5279  __m128i channel2_u_8x16;
5280  SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5281 
5282  // now we need 16 bit values instead of 8 bit values
5283 
5284  const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5285  const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5286  const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5287 
5288  const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5289  const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5290  const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5291 
5292  // we multiply each channel with the corresponding multiplication factors
5293 
5294  __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5295  __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5296  __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5297 
5298  __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5299  __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5300  __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5301 
5302  // we normalize the result by 128 and add the bias
5303 
5304  result0_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_low_u_8x16, 7), biasChannel0_s_16x8);
5305  result1_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_low_u_8x16, 7), biasChannel1_s_16x8);
5306  result2_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_low_u_8x16, 7), biasChannel2_s_16x8);
5307 
5308  result0_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_high_u_8x16, 7), biasChannel0_s_16x8);
5309  result1_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_high_u_8x16, 7), biasChannel1_s_16x8);
5310  result2_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_high_u_8x16, 7), biasChannel2_s_16x8);
5311 
5312  // from here, we need values within the range [0, 255], so that we clamp the results
5313 
5314  const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5315 
5316  result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5317  result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5318  result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5319 
5320  result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5321  result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5322  result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323 
5324  // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5325  const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5326  const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5327  const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5328 
5329  __m128i resultA_u_8x16;
5330  __m128i resultB_u_8x16;
5331  __m128i resultC_u_8x16;
5332  SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5333 
5334  // and we can store the result
5335  _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5336  _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5337  _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5338 }
5339 
5340 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4)
5341 {
5342  ocean_assert(source != nullptr && target != nullptr);
5343 
5344  // the documentation of this function designed for RGB24 to YUV24 conversion
5345 
5346  /// precise color space conversion:
5347  // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5348  // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5349  // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5350  // | 1 |
5351 
5352  // approximation:
5353  // | R | | 1192 0 1634 -223 | | Y |
5354  // | G | = | 1192 -400 -833 135 | * | U |
5355  // | B | | 1192 2066 0 -277 | | V |
5356  // | 1 |
5357 
5358  // we expect the following input pattern (for here RGB24):
5359  // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5360  // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5361 
5362  const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5363  const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5364  const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5365 
5366  __m128i channel0_u_8x16;
5367  __m128i channel1_u_8x16;
5368  __m128i channel2_u_8x16;
5369  SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5370 
5371 
5372  // now we need 16 bit values instead of 8 bit values
5373 
5374  const __m128i channel0_low_u_16x8 = SSE::removeHighBits16_8(channel0_u_8x16);
5375  const __m128i channel1_low_u_16x8 = SSE::removeHighBits16_8(channel1_u_8x16);
5376  const __m128i channel2_low_u_16x8 = SSE::removeHighBits16_8(channel2_u_8x16);
5377 
5378  const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5379  const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5380  const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5381 
5382 
5383  // we multiply each channel with the corresponding multiplication factors (int16_t * int16_t = int32_t), and we normalize the result by 1024
5384 
5385  __m128i result0_low_A_s_32x4;
5386  __m128i result0_low_B_s_32x4;
5387  __m128i result0_high_A_s_32x4;
5388  __m128i result0_high_B_s_32x4;
5389 
5390  SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel00_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5391  SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel00_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5392 
5393  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel01_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5394  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel01_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5395 
5396  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel02_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5397  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel02_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5398 
5399  result0_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5400  result0_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5401  result0_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5402  result0_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5403 
5404 
5405  __m128i result1_low_A_s_32x4;
5406  __m128i result1_low_B_s_32x4;
5407  __m128i result1_high_A_s_32x4;
5408  __m128i result1_high_B_s_32x4;
5409 
5410  SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel10_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5411  SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel10_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5412 
5413  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel11_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5414  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel11_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5415 
5416  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel12_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5417  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel12_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5418 
5419  result1_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5420  result1_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5421  result1_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5422  result1_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5423 
5424 
5425  __m128i result2_low_A_s_32x4;
5426  __m128i result2_low_B_s_32x4;
5427  __m128i result2_high_A_s_32x4;
5428  __m128i result2_high_B_s_32x4;
5429 
5430  SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel20_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5431  SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel20_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5432 
5433  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel21_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5434  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel21_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5435 
5436  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel22_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5437  SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel22_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5438 
5439  result2_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5440  result2_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5441  result2_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5442  result2_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5443 
5444 
5445  // now we have int32_t values with 0x0000 or 0xFFFF in the high 16 bits
5446  // thus we can merge 8 int32_t values to 8 int16_t values
5447 
5448  const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5449 
5450  __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5451  __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5452 
5453  __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5454  __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5455 
5456  __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5457  __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5458 
5459 
5460  // we combine 16 int16_t values to 16 uint8_t values (saturated)
5461 
5462  const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5463  const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5464  const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5465 
5466  __m128i resultA_u_8x16;
5467  __m128i resultB_u_8x16;
5468  __m128i resultC_u_8x16;
5469  SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5470 
5471  // and we can store the result
5472  _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5473  _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5474  _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5475 }
5476 
5477 OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x4)
5478 {
5479  ocean_assert(source != nullptr && target != nullptr);
5480 
5481  // the documentation of this function is designed for RGBA32 to Y8 conversion
5482  // however, in general this function can be used to apply a linear combination on the four source channels
5483  // to create one output channel
5484 
5485  // precise color space conversion:
5486  // Y = 0.299 * R + 0.587 * G + 0.114 * B
5487 
5488  // approximation:
5489  // Y = (38 * R + 75 * G + 15 * B) / 128
5490 
5491  // we expect the following input pattern (for here RGBA32):
5492  // FEDC BA98 7654 3210
5493  // ABGR ABGR ABGR ABGR
5494 
5495  // we calculate:
5496  // (int16_t)((uint8_t)R * (signed char)38) + (int16_t)((uint8_t)G * (signed char)75) for the first 16 bits
5497  // (int16_t)((uint8_t)B * (signed char)15) + (int16_t)((uint8_t)A * (signed char)0) for the second 16 bits
5498 
5499  // we store eight 16 bit values holding 64 for rounding purpose:
5500  // FE DC BA 98 76 54 32 10
5501  // 64 64 64 64 64 64 64 64
5502  const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5503 
5504  const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5505  const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5506  const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5507  const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5508 
5509  // we get the following pattern
5510  // FE DC BA 98 76 54 32 10
5511  // 0b gr 0b gr 0b gr 0b gr
5512  const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5513  const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5514  const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5515  const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5516 
5517  // now we sum the pairs of neighboring 16 bit intermediate results
5518  __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5519  __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5520 
5521  // we add 64 for rounding purpose
5522  grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5523  grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5524 
5525  // we shift the multiplication results by 7 bits (= 128)
5526  grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5527  grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5528 
5529  // now we have the following pattern (in two 128 bit registers):
5530  // FEDCBA9876543210
5531  // 0Y0Y0Y0Y0Y0Y0Y0Y
5532 
5533  // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5534  const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5535 
5536  // and we can store the result
5537  _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5538 }
5539 
5540 void FrameChannels::convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8)
5541 {
5542  ocean_assert(source != nullptr && target != nullptr);
5543 
5544  // the documentation of this function is designed for RGBA32 to YA16 conversion
5545  // however, in general this function can be used to apply a linear combination on the four source channels
5546  // to create one output channel
5547 
5548  // precise color space conversion:
5549  // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
5550  // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
5551 
5552  // approximation:
5553  // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
5554  // A = (128 * A) / 128
5555 
5556  // we expect the following input pattern (for here RGBA32):
5557  // FEDC BA98 7654 3210
5558  // ABGR ABGR ABGR ABGR
5559 
5560  // we store eight 16 bit values holding 64 for rounding purpose:
5561  // FE DC BA 98 76 54 32 10
5562  // 64 64 64 64 64 64 64 64
5563  const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5564 
5565  const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5566  const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5567  const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5568  const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5569 
5570  // we convert the 8 bit values to 16 bit values
5571 
5572  const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5573  const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5574 
5575  const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5576  const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5577 
5578  const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5579  const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5580 
5581  const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5582  const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5583 
5584  // now we have the following pattern
5585  // FE DC BA 98 76 54 32 10
5586  // 0a 0b 0g 0r 0a 0b 0g 0r
5587 
5588  const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8); // r * f00 + g * f01 | b * f02 + a * f03 | ...
5589  const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5590  const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5591  const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5592  const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5593  const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5594  const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595  const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596 
5597  const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4); // r * f00 + g * f01 + b * f02 + a * f03 | ...
5598  const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5599  const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5600  const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5601 
5602 
5603  const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8); // r * f10 + g * f11 | b * f12 + a * f13 | ...
5604  const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5605  const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5606  const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5607  const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5608  const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5609  const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610  const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611 
5612  const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4); // r * f10 + g * f11 + b * f12 + a * f13 | ...
5613  const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5614  const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5615  const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5616 
5617  // now we interleave the results of first and second channel (as both results fit into 16 bit)
5618 
5619  __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5620  __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5621  __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5622  __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5623 
5624  // we add 64 for rounding purpose
5625  resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5626  resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5627  resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5628  resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5629 
5630  // we shift the multiplication results by 7 bits (= 128)
5631  resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5632  resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5633  resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5634  resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5635 
5636  // now we have the following pattern (in two 128 bit registers):
5637  // FEDCBA9876543210
5638  // 0A0Y0A0Y0A0Y0A0Y
5639 
5640  // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5641  const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5642  const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5643 
5644  // and we can store the result
5645  _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5646  _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5647 }
5648 
5649 #endif // OCEAN_HARDWARE_SSE_VERSION
5650 
5651 #if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5652 
5653 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
5654 void FrameChannels::convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8)
5655 {
5656  static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid multiplication factors!");
5657 
5658  ocean_assert(source != nullptr && target != nullptr);
5659 
5660  // the documentation of this function designed for RGB24 to Y8 conversion
5661 
5662  // precise color space conversion:
5663  // Y = 0.299 * R + 0.587 * G + 0.114 * B
5664 
5665  // approximation:
5666  // Y = (38 * R + 75 * G + 15 * B) / 128
5667 
5668  // we expect the following input pattern (for here RGB24):
5669  // FEDC BA98 7654 3210
5670  // RBGR BGRB GRBG RBGR
5671 
5672  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5673  // source_u_8x8x3.val[0]: R R R R R R R R
5674  // source_u_8x8x3.val[1]: G G G G G G G G
5675  // source_u_8x8x3.val[2]: B B B B B B B B
5676 
5677  uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5678 
5679  uint16x8_t intermediateResults_u_16x8;
5680 
5681  // we multiply the first channel with the specified factor (unless zero)
5682 
5683  if constexpr (tUseFactorChannel0)
5684  {
5685  intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5686  }
5687  else
5688  {
5689  intermediateResults_u_16x8 = vdupq_n_u16(0u);
5690  }
5691 
5692  // we multiply the second channel with the specified factor (unless zero) and accumulate the results
5693 
5694  if constexpr (tUseFactorChannel1)
5695  {
5696  intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5697  }
5698 
5699  // we multiply the third channel with the specified factor (unless zero) and accumulate the results
5700 
5701  if constexpr (tUseFactorChannel2)
5702  {
5703  intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5704  }
5705 
5706  // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
5707  uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7); // results_u_8x8 = (intermediateResults_u_16x8 + 2^6) >> 2^7
5708 
5709  // and we can store the result
5710  vst1_u8(target, results_u_8x8);
5711 }
5712 
5713 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5714 {
5715  ocean_assert(source != nullptr && target != nullptr);
5716 
5717  // the documentation of this function designed for YUV24 to RGB24 conversion
5718 
5719  // precise color space conversion:
5720  // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5721  // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5722  // | B | | 1 1.732446 0.0 -221.753088 | | V |
5723  // | 1 |
5724 
5725  // approximation:
5726  // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5727  // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5728  // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5729 
5730  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5731  // source_u_8x8x3.val[0]: R R R R R R R R
5732  // source_u_8x8x3.val[1]: G G G G G G G G
5733  // source_u_8x8x3.val[2]: B B B B B B B B
5734 
5735  const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5736 
5737  // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5738  const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5739  const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5740  const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5741 
5742  // now we apply the 3x3 matrix multiplication
5743 
5744  int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5745  int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5746  int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5747 
5748  intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5749  intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5750  intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5751 
5752  intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5753  intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5754  intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5755 
5756  uint8x8x3_t results_u_8x8x3;
5757 
5758  // saturated narrow signed to unsigned, normalized by 2^6
5759  results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5760  results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5761  results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5762 
5763  // and we can store the result
5764  vst3_u8(target, results_u_8x8x3);
5765 }
5766 
5767 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5768 {
5769  ocean_assert(source != nullptr && target != nullptr);
5770 
5771  // the documentation of this function designed for YUV24 to RGB24 conversion
5772 
5773  // precise color space conversion:
5774  // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5775  // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5776  // | B | | 1 1.732446 0.0 -221.753088 | | V |
5777  // | 1 |
5778 
5779  // approximation:
5780  // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5781  // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5782  // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5783 
5784  const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5785 
5786  // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5787  const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5788  const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5789  const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5790 
5791  const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5792  const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5793  const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5794 
5795  // now we mulitply apply the 3x3 matrix multiplication
5796 
5797  int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5798  int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5799  int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5800 
5801  int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5802  int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5803  int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5804 
5805  intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5806  intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5807  intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5808 
5809  intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5810  intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5811  intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5812 
5813  intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5814  intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5815  intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5816 
5817  intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5818  intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5819  intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5820 
5821  uint8x16x3_t results_u_8x16x3;
5822 
5823  // saturated narrow signed to unsigned, normalized by 2^6
5824  results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5825  results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5826  results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5827 
5828  // and we can store the result
5829  vst3q_u8(target, results_u_8x16x3);
5830 }
5831 
5832 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
5833 {
5834  ocean_assert(source != nullptr && target != nullptr);
5835 
5836  // the documentation of this function designed for RGB24 to YUV24 conversion
5837 
5838  // precise color space conversion:
5839  // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5840  // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5841  // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5842  // | 1 |
5843 
5844  // approximation:
5845  // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5846  // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5847  // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5848 
5849  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5850  // source_u_8x8x3.val[0]: R R R R R R R R
5851  // source_u_8x8x3.val[1]: G G G G G G G G
5852  // source_u_8x8x3.val[2]: B B B B B B B B
5853 
5854  const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5855 
5856  const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5857  const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5858  const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5859 
5860  int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5861  int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5862  int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5863 
5864  intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5865  intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5866  intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5867 
5868  intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5869  intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5870  intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5871 
5872  // now we add the bias values (saturated)
5873 
5874  intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5875  intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5876  intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5877 
5878  uint8x8x3_t results_u_8x8x3;
5879 
5880  // saturated narrow signed to unsigned
5881  results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5882  results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5883  results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5884 
5885  // and we can store the result
5886  vst3_u8(target, results_u_8x8x3);
5887 }
5888 
5889 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5890 {
5891  ocean_assert(source != nullptr && target != nullptr);
5892 
5893  // the documentation of this function designed for YUV24 to RGB24 conversion
5894 
5895  // precise color space conversion:
5896  // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5897  // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5898  // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5899  // | 1 |
5900 
5901  // approximation:
5902  // | R | | 1192 0 1634 -223 | | Y |
5903  // | G | = | 1192 -400 -833 135 | * | U |
5904  // | B | | 1192 2066 0 -277 | | V |
5905  // | 1 |
5906 
5907  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5908  // source_u_8x8x3.val[0]: R R R R R R R R
5909  // source_u_8x8x3.val[1]: G G G G G G G G
5910  // source_u_8x8x3.val[2]: B B B B B B B B
5911 
5912  const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5913 
5914  const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5915  const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5916  const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5917 
5918  const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5919  const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5920 
5921  int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5922  int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5923 
5924  int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5925  int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5926 
5927  int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5928  int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5929 
5930 
5931  const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5932  const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5933 
5934  intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5935  intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5936 
5937  intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5938  intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5939 
5940  intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5941  intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5942 
5943 
5944  const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5945  const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5946 
5947  intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5948  intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5949 
5950  intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5951  intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5952 
5953  intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5954  intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5955 
5956 
5957  // now we add the bias values (saturated)
5958 
5959  intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5960  intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5961 
5962  intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5963  intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5964 
5965  intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5966  intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5967 
5968 
5969  uint8x8x3_t results_u_8x8x3;
5970 
5971  // saturated narrow signed to unsigned
5972  results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5973  results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5974  results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5975 
5976  // and we can store the result
5977  vst3_u8(target, results_u_8x8x3);
5978 }
5979 
5980 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5981 {
5982  ocean_assert(source != nullptr && target != nullptr);
5983 
5984  // the documentation of this function designed for YUV24 to RGB24 conversion
5985 
5986  // precise color space conversion:
5987  // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5988  // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5989  // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5990  // | 1 |
5991 
5992  // approximation:
5993  // | R | | 1192 0 1634 -223 | | Y |
5994  // | G | = | 1192 -400 -833 135 | * | U |
5995  // | B | | 1192 2066 0 -277 | | V |
5996  // | 1 |
5997 
5998  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5999  // source_u_8x8x3.val[0]: R R R R R R R R
6000  // source_u_8x8x3.val[1]: G G G G G G G G
6001  // source_u_8x8x3.val[2]: B B B B B B B B
6002 
6003  const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6004 
6005  const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6006  const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6007  const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6008 
6009  const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6010  const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6011  const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6012 
6013  const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6014  const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6015  const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6016  const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6017 
6018  int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6019  int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6020  int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6021  int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6022 
6023  int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6024  int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6025  int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6026  int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6027 
6028  int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6029  int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6030  int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6031  int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6032 
6033 
6034  const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6035  const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6036  const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6037  const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6038 
6039  intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6040  intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6041  intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6042  intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6043 
6044  intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6045  intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6046  intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6047  intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6048 
6049  intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6050  intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6051  intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6052  intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6053 
6054 
6055  const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6056  const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6057  const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6058  const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6059 
6060  intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6061  intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6062  intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6063  intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6064 
6065  intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6066  intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6067  intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6068  intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6069 
6070  intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6071  intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6072  intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6073  intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6074 
6075 
6076  // now we add the bias values (saturated)
6077 
6078  intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6079  intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6080  intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6081  intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6082 
6083  intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6084  intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6085  intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6086  intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6087 
6088  intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6089  intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6090  intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6091  intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6092 
6093 
6094  uint8x16x3_t results_u_8x16x3;
6095 
6096  // saturated narrow signed to unsigned
6097  results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6098 
6099  results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6100  results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6101 
6102  // and we can store the result
6103  vst3q_u8(target, results_u_8x16x3);
6104 }
6105 
6106 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
6107 {
6108  ocean_assert(source != nullptr && target != nullptr);
6109 
6110  // the documentation of this function designed for RGB24 to YUV24 conversion
6111 
6112  // precise color space conversion:
6113  // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
6114  // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
6115  // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
6116  // | 1 |
6117 
6118  // approximation:
6119  // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
6120  // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
6121  // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
6122 
6123  // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6124  // source_u_8x8x3.val[0]: R R R R R R R R
6125  // source_u_8x8x3.val[1]: G G G G G G G G
6126  // source_u_8x8x3.val[2]: B B B B B B B B
6127 
6128  const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6129 
6130  const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6131  const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6132  const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6133 
6134  const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6135  const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6136  const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6137 
6138 
6139  int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6140  int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6141  int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6142 
6143  int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6144  int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6145  int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6146 
6147 
6148  intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6149  intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6150  intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6151 
6152  intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6153  intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6154  intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6155 
6156 
6157  intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6158  intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6159  intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6160 
6161  intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6162  intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6163  intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6164 
6165  // now we add the bias values (saturated)
6166 
6167  intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6168  intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6169 
6170  intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6171  intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6172 
6173  intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6174  intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6175 
6176 
6177  uint8x16x3_t results_u_8x16x3;
6178 
6179  // saturated narrow signed to unsigned shift with rounding
6180  results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6181  results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6182  results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6183 
6184  // and we can store the result
6185  vst3q_u8(target, results_u_8x16x3);
6186 }
6187 
6188 OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16)
6189 {
6190  ocean_assert(source != nullptr && target != nullptr);
6191 
6192  // the documentation of this function designed for YUV24 to RGB24 conversion
6193 
6194  // precise color space conversion:
6195  // | R | | 1 0.0 1.370705 -175.45024 | | Y |
6196  // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
6197  // | B | | 1 1.732446 0.0 -221.753088 | | V |
6198  // | 1 |
6199 
6200  // approximation:
6201  // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
6202  // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
6203  // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
6204 
6205  const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6206 
6207  // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
6208  const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6209  const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6210  const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6211 
6212  const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6213  const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6214  const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6215 
6216  // now we mulitply apply the 3x3 matrix multiplication
6217 
6218  int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6219  int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6220  int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6221 
6222  int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6223  int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6224  int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6225 
6226  intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
6227  intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6228  intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6229 
6230  intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6231  intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6232  intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6233 
6234  intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6235  intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6236  intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6237 
6238  intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6239  intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6240  intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6241 
6242  uint8x16x4_t results_u_8x16x4;
6243 
6244  // saturated narrow signed to unsigned, normalized by 2^6
6245  results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6246  results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6247  results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6248  results_u_8x16x4.val[3] = channelValue3_u_8x16;
6249 
6250  // and we can store the result
6251  vst4q_u8(target, results_u_8x16x4);
6252 }
6253 
6254 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
6255 void FrameChannels::convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8)
6256 {
6257  static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid multiplication factors!");
6258 
6259  ocean_assert(source != nullptr && target != nullptr);
6260 
6261  // the documentation of this function designed for RGBA32 to Y8 conversion
6262 
6263  // precise color space conversion:
6264  // Y = 0.299 * R + 0.587 * G + 0.114 * B
6265 
6266  // approximation:
6267  // Y = (38 * R + 75 * G + 15 * B) / 128
6268 
6269  // we expect the following input pattern (for here RGBA32):
6270  // FEDC BA98 7654 3210
6271  // ABGR ABGR ABGR ABGR
6272 
6273  // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6274  // m4_64_pixels.val[0]: R R R R R R R R
6275  // m4_64_pixels.val[1]: G G G G G G G G
6276  // m4_64_pixels.val[2]: B B B B B B B B
6277  // m4_64_pixels.val[3]: A A A A A A A A
6278 
6279  uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6280 
6281  uint16x8_t intermediateResults_16x8;
6282 
6283  // we multiply the first channel with the specified factor (unless zero)
6284 
6285  if constexpr (tUseFactorChannel0)
6286  {
6287  intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6288  }
6289  else
6290  {
6291  intermediateResults_16x8 = vdupq_n_u16(0u);
6292  }
6293 
6294  // we multiply the second channel with the specified factor (unless zero) and accumulate the results
6295 
6296  if constexpr (tUseFactorChannel1)
6297  {
6298  intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6299  }
6300 
6301  // we multiply the third channel with the specified factor (unless zero) and accumulate the results
6302 
6303  if constexpr (tUseFactorChannel2)
6304  {
6305  intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6306  }
6307 
6308  // we multiply the fourth channel with the specified factor (unless zero) and accumulate the results
6309 
6310  if constexpr (tUseFactorChannel3)
6311  {
6312  intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6313  }
6314 
6315  // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6316  uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7); // pixels_u_8x8x4 = (intermediateResults_16x8 + 2^6) >> 2^7
6317 
6318  // and we can store the result
6319  vst1_u8(target, results_u_8x8);
6320 }
6321 
6322 OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8)
6323 {
6324  ocean_assert(source != nullptr && target != nullptr);
6325 
6326  // the documentation of this function designed for RGBA32 to YA16 conversion
6327 
6328  // precise color space conversion:
6329  // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
6330  // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
6331 
6332  // approximation:
6333  // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
6334  // A = (128 * A) / 128
6335 
6336  // we expect the following input pattern (for here RGBA32):
6337  // FEDC BA98 7654 3210
6338  // ABGR ABGR ABGR ABGR
6339 
6340  // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6341  // m4_64_pixels.val[0]: R R R R R R R R
6342  // m4_64_pixels.val[1]: G G G G G G G G
6343  // m4_64_pixels.val[2]: B B B B B B B B
6344  // m4_64_pixels.val[3]: A A A A A A A A
6345 
6346  uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6347 
6348  uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6349  uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6350 
6351  intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6352  intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6353 
6354  intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6355  intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6356 
6357  intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6358  intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6359 
6360  uint8x8x2_t results_u_8x8x2;
6361 
6362  // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6363 
6364  results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7); // results_u_8x8x2.val[0] = (intermediateResultsChannel0_16x8 + 2^6) >> 2^7
6365  results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6366 
6367  // and we can store the result
6368  vst2_u8(target, results_u_8x8x2);
6369 }
6370 
6371 #endif // OCEAN_HARDWARE_NEON_VERSION
6372 
6373 }
6374 
6375 }
6376 
6377 #endif // META_OCEAN_CV_FRAME_CHANNELS_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition: FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition: FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition: FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5340
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition: FrameChannels.h:4288
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition: FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition: FrameChannels.h:6188
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition: FrameChannels.h:4188
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition: FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition: FrameChannels.h:5186
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition: FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition: FrameChannels.h:4876
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition: FrameChannels.h:4949
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition: FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition: FrameChannels.h:4091
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition: FrameChannels.h:4006
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5767
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition: FrameChannels.h:4487
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition: FrameChannels.h:4720
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:3968
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5713
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition: FrameChannels.h:5980
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition: FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition: FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition: FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5889
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition: FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition: FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition: FrameChannels.h:4853
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition: FrameChannels.h:4129
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:4614
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition: FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition: FrameChannels.h:4053
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition: FrameChannels.h:6106
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition: FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition: FrameChannels.h:5832
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition: FrameChannels.h:4327
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition: FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition: FrameChannels.h:5477
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition: FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition: FrameChannels.h:4028
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition: FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition: FrameChannels.h:5113
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition: FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition: FrameChannels.h:5252
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition: FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition: FrameChannels.h:3987
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition: FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition: FrameChannels.h:6322
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition: FrameChannels.h:4348
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition: FrameChannels.h:4421
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition: FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition: FrameChannels.h:4510
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition: FrameChannels.h:5024
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition: FrameChannels.h:5540
This is the base class for all frame converter classes.
Definition: FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition: FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition: FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition: FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition: FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition: FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition: FrameConverter.h:3211
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition: FrameConverter.h:589
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition: FrameConverter.h:580
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition: FrameConverter.h:3234
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition: NEON.h:1208
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition: SSE.h:3108
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition: SSE.h:3619
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition: SSE.h:3764
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 16 bit values by applying a right shift.
Definition: SSE.h:3066
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition: SSE.h:3909
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition: SSE.h:3345
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition: SSE.h:3387
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition: SSE.h:3799
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
Definition: SSE.h:3304
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition: SSE.h:3770
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition: SSE.h:3412
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition: SSE.h:3900
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition: SSE.h:3372
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition: Caller.h:2876
This class implements Ocean's image class.
Definition: Frame.h:1792
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition: Frame.h:183
@ FORMAT_UNDEFINED
Undefined pixel format.
Definition: Frame.h:187
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition: DataType.h:501
This class implements a worker able to distribute function calls over different threads.
Definition: Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition: Frame.h:1755
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition: Base.h:96
The namespace covering the entire Ocean framework.
Definition: Accessor.h:15
Default definition of a type with tBytes bytes.
Definition: DataType.h:32