Ocean
Loading...
Searching...
No Matches
FrameChannels.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
10
11#include "ocean/cv/CV.h"
13#include "ocean/cv/NEON.h"
14#include "ocean/cv/SSE.h"
15
16#include "ocean/base/DataType.h"
17#include "ocean/base/Frame.h"
18#include "ocean/base/Worker.h"
19
20namespace Ocean
21{
22
23namespace CV
24{
25
26/**
27 * This class implements frame channel conversion, transformation and extraction functions.
28 * @ingroup cv
29 */
30class OCEAN_CV_EXPORT FrameChannels : public FrameConverter
31{
32 public:
33
34 /**
35 * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
36 */
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
38
39 /**
40 * Definition of a function pointer to a function able to operate on an entire image row.
41 */
42 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(const TSource* sourceRow, TTarget* targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
44
45 /**
46 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
47 * Best practice is to avoid using these functions if binary size matters,<br>
48 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
49 */
50 class OCEAN_CV_EXPORT Comfort
51 {
52 public:
53
54 /**
55 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
56 * Usage:
57 * @code
58 * Frame rgbSourceFrame = ...;
59 *
60 * Frames targetFrames;
61 *
62 * if (separateTo1Channel(rgbSourceFrame, targetFrames))
63 * {
64 * ocean_assert(targetFrames.size() == 3);
65 *
66 * // do something with targetFrames
67 * }
68 * @endcode
69 * @param sourceFrame The frame to be separated, must be valid
70 * @param targetFrames The resulting frames each holding one channel of the source frame, will be set automatically
71 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
72 * @return True, if succeeded
73 */
74 static bool separateTo1Channel(const Frame& sourceFrame, Frames& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
75
76 /**
77 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
78 * Usage:
79 * @code
80 * Frame rgbSourceFrame = ...;
81 *
82 * Frame targetFrameA;
83 * Frame targetFrameB;
84 * Frame targetFrameC;
85 *
86 * if (separateTo1Channel(rgbSourceFrame, {&targetFrameA, &targetFrameB, &targetFrameC}))
87 * {
88 * // do something with targetFrames
89 * }
90 * @endcode
91 * @param sourceFrame The frame to be separated, must be valid
92 * @param targetFrames The resulting frames each holding one channel of the source frame, one for each source channels
93 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
94 * @return True, if succeeded
95 */
96 static bool separateTo1Channel(const Frame& sourceFrame, const std::initializer_list<Frame*>& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
97
98 /**
99 * Zips/interleaves 1-channel images into one image with n-channels.
100 * Usage:
101 * @code
102 * Frame sourceFrameA = ...;
103 * Frame sourceFrameB = ...;
104 * Frame sourceFrameC = ...;
105 *
106 * Frame targetFrame;
107 * if (zipChannels({sourceFrameA, sourceFrameB, sourceFrameC}, targetFrame))
108 * {
109 * ocean_assert(targetFrame.channels() == 3u);
110 *
111 * // do something with targetFrame
112 * }
113 * @endcode
114 * @param sourceFrames The frames to be zipped/interleaved, must be valid
115 * @param targetFrame The resulting frame holding n channels, will be set automatically
116 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
117 * @return True, if succeeded
118 */
119 static bool zipChannels(const std::initializer_list<Frame>& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
120
121 /**
122 * Zips/interleaves 1-channel images into one image with n-channels.
123 * Usage:
124 * @code
125 * Frames sourceFrames = ...;
126 *
127 * Frame targetFrame;
128 * if (zipChannels(sourceFrames, targetFrame))
129 * {
130 * ocean_assert(targetFrame.channels() == sourceFrames.size());
131 *
132 * // do something with targetFrame
133 * }
134 * @endcode
135 * @param sourceFrames The frames to be zipped/interleaved, must be valid
136 * @param targetFrame The resulting frame holding n channels, will be set automatically
137 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
138 * @return True, if succeeded
139 */
140 static bool zipChannels(const Frames& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
141
142 /**
143 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
144 * @param frame The image to convert, must be valid
145 * @param worker Optional worker object to distribute the computation
146 * @return True, if succeeded
147 * @see straightAlphaToPremultipliedAlpha().
148 */
149 static bool premultipliedAlphaToStraightAlpha(Frame& frame, Worker* worker = nullptr);
150
151 /**
152 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
153 * @param source The source image to convert, must be valid
154 * @param target The resulting converted target image, the frame type will be changed if it is not match to the source frame
155 * @param worker Optional worker object to distribute the computation
156 * @return True, if succeeded
157 * @see straightAlphaToPremultipliedAlpha().
158 */
159 static bool premultipliedAlphaToStraightAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
160
161 /**
162 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
163 * @param frame The image to convert, must be valid
164 * @param worker Optional worker object to distribute the computation
165 * @see premultipliedAlphaToStraightAlpha().
166 */
167 static bool straightAlphaToPremultipliedAlpha(Frame& frame, Worker* worker = nullptr);
168
169 /**
170 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
171 * @param source The source image to convert, must be valid
172 * @param target The resulting converted target image, must be valid
173 * @param worker Optional worker object to distribute the computation
174 * @see premultipliedAlphaToStraightAlpha().
175 */
176 static bool straightAlphaToPremultipliedAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
177 };
178
179 /**
180 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
181 * Usage:
182 * @code
183 * const unsigned int width = ...;
184 * const unsigned int height = ...;
185 *
186 * uint8_t* sourceFrame = ...;
187 * const unsigned int sourceFramePaddingElements = ...;
188 *
189 * constexpr unsigned int channels = 2u;
190 *
191 * const uint8_t* targetFrames[channels] = {..., ...};
192 * const unsigned int targetFramesPaddingElements[2] = {..., ...};
193 *
194 * separateTo1Channel<uint8_t, uint8_t, channels>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
195 * @endcode
196 * @param sourceFrame The frame to be separated, must be valid
197 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
198 * @param width The width of the source frame in pixel, with range [1, infinity)
199 * @param height The height of the source frame in pixel, with range [1, infinity)
200 * @param channels The number of channels the source frame has, with range [1, infinity)
201 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
202 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
203 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
204 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
205 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
206 */
207 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
209
210 /**
211 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
212 * Usage:
213 * @code
214 * const unsigned int width = ...;
215 * const unsigned int height = ...;
216 *
217 * const uint8_t* sourceFrame = ...;
218 * const unsigned int sourceFramePaddingElements = ...;
219 *
220 * uint8_t* targetFrame0 = ...;
221 * uint8_t* targetFrame1 = ...;
222 * const unsigned int targetFramePaddingElements0 = ...;
223 * const unsigned int targetFramePaddingElements1 = ...;
224 *
225 * separateTo1Channel<uint8_t, uint8_t>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
226 * @endcode
227 * @param sourceFrame The frame to be separated, must be valid
228 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
229 * @param width The width of the source frame in pixel, with range [1, infinity)
230 * @param height The height of the source frame in pixel, with range [1, infinity)
231 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
232 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
233 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
234 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
235 */
236 template <typename TSource, typename TTarget>
237 static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
238
239 /**
240 * Zips/interleaves 1-channel images into one image with n-channels.
241 * Usage:
242 * @code
243 * const unsigned int width = ...;
244 * const unsigned int height = ...;
245 *
246 * const uint8_t* sourceFrames[2] = {..., ...};
247 * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
248 *
249 * uint8_t* targetFrame = ...;
250 * const unsigned int targetFramePaddingElements = ...;
251 *
252 * zipChannels<uint8_t, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFramesPaddingElements, targetFramePaddingElements);
253 * @endcode
254 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
255 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
256 * @param width The width of the source frames in pixel, with range [1, infinity)
257 * @param height The height of the source frames in pixel, with range [1, infinity)
258 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
259 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
260 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
261 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
262 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
263 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
264 */
265 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
267
268 /**
269 * Zips/interleaves 1-channel images into one image with n-channels.
270 * Usage:
271 * @code
272 * const unsigned int width = ...;
273 * const unsigned int height = ...;
274 *
275 * const uint8_t* sourceFrame0 = ...;
276 * const uint8_t* sourceFrame1 = ...;
277 * const unsigned int sourceFramePaddingElements0 = ...;
278 * const unsigned int sourceFramePaddingElements1 = ...;
279 *
280 * uint8_t* targetFrame = ...;
281 * const unsigned int targetFramePaddingElements = ...;
282 *
283 * zipChannels<uint8_t, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
284 * @endcode
285 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
286 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
287 * @param width The width of the source frames in pixel, with range [1, infinity)
288 * @param height The height of the source frames in pixel, with range [1, infinity)
289 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
290 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
291 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
292 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
293 */
294 template <typename TSource, typename TTarget>
295 static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
296
297 /**
298 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the front of all existing channels.
299 * @param source The source frame to which the new channel will be added, must be valid
300 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
301 * @param target The target frame receiving the joined channels, must be valid
302 * @param width The width of the frames in pixel, with range [1, infinity)
303 * @param height The height of the frames in pixel, with range [1, infinity)
304 * @param conversionFlag The conversion to be applied
305 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
306 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
307 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
308 * @param worker Optional worker object to distribute the computational load
309 * @tparam T Data type of each channel pixel value
310 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
311 */
312 template <typename T, unsigned int tSourceChannels>
313 static inline void addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
314
315 /**
316 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
317 * @param source The source frame that provided the existing channels
318 * @param newChannelValue Value that will be assigned to the new channel for each pixel
319 * @param target The target frame to that the existing channels and the new channel will be added (as new first channel)
320 * @param width The width of the frames in pixel, with range [1, infinity)
321 * @param height The height of the frames in pixel, with range [1, infinity)
322 * @param conversionFlag The conversion to be applied
323 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
324 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
325 * @param worker Optional worker object to distribute the computational load
326 * @tparam T Data type of each channel pixel value
327 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
328 */
329 template <typename T, unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
331
332 /**
333 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the back of all existing channels.
334 * @param source The source frame to which the new channel will be added, must be valid
335 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
336 * @param target The target frame receiving the joined channels, must be valid
337 * @param width The width of the frames in pixel, with range [1, infinity)
338 * @param height The height of the frames in pixel, with range [1, infinity)
339 * @param conversionFlag The conversion to be applied
340 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
341 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
342 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
343 * @param worker Optional worker object to distribute the computational load
344 * @tparam T Data type of each channel pixel value
345 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
346 */
347 template <typename T, unsigned int tSourceChannels>
348 static inline void addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
349
350 /**
351 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
352 * @param source The source frame that provided the existing channels
353 * @param newChannelValue Value that will be assigned to the new channel for each pixel
354 * @param target The target frame to that the existing channels and the new channel will be added (as new last channel)
355 * @param width The width of the frames in pixel, with range [1, infinity)
356 * @param height The height of the frames in pixel, with range [1, infinity)
357 * @param conversionFlag The conversion to be applied
358 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
359 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
360 * @param worker Optional worker object to distribute the computational load
361 * @tparam T Data type of each channel pixel value
362 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
363 */
364 template <typename T, unsigned int tSourceChannels>
365 static inline void addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
366
367 /**
368 * Removes the first channel from a given frame with zipped (generic) pixel format.
369 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
370 * @param source The source frame from that the first channel will be removed, must be valid
371 * @param target The target frame without the first channel, must be valid
372 * @param width The width of the frames in pixel, with range [1, infinity)
373 * @param height The height of the frames in pixel, with range [1, infinity)
374 * @param conversionFlag The conversion to be applied
375 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
376 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
377 * @param worker Optional worker object to distribute the computational load
378 * @tparam T Data type of each channel pixel value
379 * @tparam tSourceChannels Number of channels of the source frame (including the channel that will be removed), with range [2, infinity)
380 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeLastChannel().
381 */
382 template <typename T, unsigned int tSourceChannels>
383 static inline void removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
384
385 /**
386 * Removes the last channel from a given frame with zipped (generic) pixel format.
387 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
388 * @param source The source frame from that the first channel will be removed, must be valid
389 * @param target The target frame without the first channel, must be valid
390 * @param width The width of the frames in pixel, with range [1, infinity)
391 * @param height The height of the frames in pixel, with range [1, infinity)
392 * @param conversionFlag The conversion to be applied
393 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
394 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
395 * @param worker Optional worker object to distribute the computational load
396 * @tparam T Data type of each channel pixel value
397 * @tparam tSourceChannels Number of channels of the frame (including the channel that will be removed), with range [2, infinity)
398 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeFirstChannel().
399 */
400 template <typename T, unsigned int tSourceChannels>
401 static inline void removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
402
403 /**
404 * Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel format.
405 * @param source The source frame from that the channel will be copied, must be valid
406 * @param target The target frame to which the channel will be copied, must be valid
407 * @param width The width of both frames in pixel, with range [1, infinity)
408 * @param height The height of both frames in pixel, with range [1, infinity)
409 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
410 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
411 * @param worker Optional worker object to distribute the computational load
412 * @tparam T Data type of each channel pixel value
413 * @tparam tSourceChannels Number of channels in the source frame, with range [1, infinity)
414 * @tparam tTargetChannels Number of channels in the target frame, with range [1, infinity)
415 * @tparam tSourceChannelIndex The index of the source channel that will be copied, with range [0, tSourceChannels - 1]
416 * @tparam tTargetChannelIndex The index of the target channel that will be copied, with range [0, tTargetChannels - 1]
417 */
418 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
419 static inline void copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
420
421 /**
422 * Sets one channel of a frame with a specific unique value.
423 * @param frame The frame in that one channel of each pixel will be set
424 * @param width The width of the frame in pixel, with range [1, infinity)
425 * @param height The height of the frame in pixel, with range [1, infinity)
426 * @param value The value to be set
427 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
428 * @param worker Optional worker object to distribute the computation
429 * @tparam T Data type of each channel pixel value
430 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
431 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
432 */
433 template <typename T, unsigned int tChannel, unsigned int tChannels>
434 static inline void setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker = nullptr);
435
436 /**
437 * Reverses the order of the channels of a frame with zipped pixel format.
438 * The first channel will be exchanged with the last channel, the second channel will be exchanged with the second last channel and so on.
439 * @param source The source frame from that the channels will be swapped, must be valid
440 * @param target The target frame that receives the swapped channels, must be valid
441 * @param width The width of the source frame in pixel, with range (0, infinity)
442 * @param height The height of the source frame in pixel, with range (0, infinity)
443 * @param conversionFlag The conversion to be applied
444 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
445 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
446 * @param worker Optional worker object to distribute the computation
447 * @tparam T Data type of each channel pixel value
448 * @tparam tChannels Number of data channels, with range [1, infinity)
449 */
450 template <typename T, unsigned int tChannels>
451 static inline void reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
452
453 /**
454 * Shuffles the channels of a frame by an arbitrary pattern.
455 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
456 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
457 * <pre>
458 * source pixel R G B A
459 * 0 1 2 3
460 * target pixel B G R A
461 * 2 1 0 3
462 * pattern (with reversed order): 0x3012
463 * </pre>
464 * @param source The source frame for which the channels will be shuffled, must be valid
465 * @param target The target frame that receives the shuffled channels, must be valid
466 * @param width The width of the source frame in pixel, with range [1, infinity)
467 * @param height The height of the source frame in pixel, with range [1, infinity)
468 * @param conversionFlag The conversion to be applied
469 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
470 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
471 * @param worker Optional worker object to distribute the computation
472 * @tparam T Data type of each channel pixel value
473 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
474 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
475 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
476 */
477 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
478 static inline void shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
479
480 /**
481 * Shuffles the channels of source frame and sets the last channel with constant value in the target frame.
482 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
483 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
484 * <pre>
485 * source pixel R G B
486 * 0 1 2
487 * target pixel B G R A
488 * 2 1 0
489 * pattern (with reversed order): 0x012
490 * </pre>
491 * @param source The source frame for which the channels will be shuffled, must be valid
492 * @param newChannelValue The constant channel value which will be added as last channel to the target frame, with range [0, infinity)
493 * @param target The target frame that receives the shuffled channels, must be valid
494 * @param width The width of the source frame in pixel, with range [1, infinity)
495 * @param height The height of the source frame in pixel, with range [1, infinity)
496 * @param conversionFlag The conversion to be applied
497 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
498 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
499 * @param worker Optional worker object to distribute the computation
500 * @tparam T Data type of each channel pixel value
501 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
502 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
503 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
504 */
505 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
507
508 /**
509 * Narrows 16 bit channels of a frame to 8 bit channels.
510 * @param source The source frame for which the channels will be narrowed, must be valid
511 * @param target The target frame that receives the narrowed channels, must be valid
512 * @param width The width of the source frame in pixel, with range [1, infinity)
513 * @param height The height of the source frame in pixel, with range [1, infinity)
514 * @param conversionFlag The conversion to be applied
515 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
516 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
517 * @param worker Optional worker object to distribute the computation
518 * @tparam tChannels Number of source data channels, with range [1, infinity)
519 */
520 template <unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
522
523 /**
524 * Applies a specific modifier function on each pixel.
525 * @param source The source frame providing the pixel information, must be valid
526 * @param target The target frame receiving the pixel information, must be valid
527 * @param width The width of the source frame in pixel, with range (0, infinity)
528 * @param height The height of the source frame in pixel, with range (0, infinity)
529 * @param conversionFlag The conversion to be applied
530 * @param worker Optional worker object to distribute the computation
531 * @tparam T Data type of each channel pixel value
532 * @tparam tChannels Number of data channels, with range [1, infinity)
533 * @tparam tPixelFunction Pixel modification function
534 */
535 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker = nullptr);
537
538 /**
539 * Applies a specific modifier function on each pixel.
540 * @param source The source frame providing the pixel information, must be valid
541 * @param target The target frame receiving the pixel information, must be valid
542 * @param width The width of the source frame in pixel, with range [1, infinity)
543 * @param height The height of the source frame in pixel, with range [1, infinity)
544 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
545 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
546 * @param conversionFlag The conversion to be applied
547 * @param worker Optional worker object to distribute the computation
548 * @tparam TSource Data type of each source channel pixel value
549 * @tparam TTarget Data type of each target channel pixel value
550 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
551 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
552 * @tparam tPixelFunction Pixel modification function
553 */
554 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
556
557 /**
558 * Generic bivariate pixel operations
559 * Applies bivariate per-pixel operators: `C(y, x) = op(A(y, x), B(y, x))`. Input and output must have the same frame type and have a single plane.
560 * @param source0 First source frame
561 * @param source1 Second source frame
562 * @param target The target frame
563 * @param width The width of the source frame in pixel, with range [1, infinity)
564 * @param height The height of the source frame in pixel, with range [1, infinity)
565 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
566 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
567 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
568 * @param conversionFlag The conversion to be applied
569 * @param worker Optional worker object to distribute the computation
570 * @tparam TSource0 Type of the first data source
571 * @tparam TSource1 Type of the second data source
572 * @tparam TTarget Type of the target
573 * @tparam TIntermediate Data type that is used for the computation of intermediate results, e.g. if TSource0 and TSource1 are different
574 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
575 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
576 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
577 */
578 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
580
581 /**
582 * Applies a row operator to all rows of a source image.
583 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
584 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
585 * @param source The source frame to which the row operator is applied, must be valid
586 * @param target The target frame receiving the result of the row operator, must be valid
587 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
588 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
589 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
590 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
591 * @param rowOperatorFunction The pointer to the row operator function, must be valid
592 * @param worker Optional worker object to distribute the computation
593 * @tparam TSource The data type of the source elements
594 * @tparam TTarget The data type of the target elements
595 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
596 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
597 */
598 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
599 static void applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker = nullptr);
600
601 /**
602 * Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
603 * This function mainly mirrors or flips an image.
604 * @param source The source frame buffer, must be valid
605 * @param target The target frame buffer, must be valid
606 * @param width The width of the frame in pixel, with range [1, infinity)
607 * @param height The height of the frame in pixel, with range [1, infinity)
608 * @param conversionFlag The conversion to be applied
609 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
610 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
611 * @param worker Optional worker object to distribute the computation
612 * @tparam T Data type of each channel pixel value, e.g., 'uint8_t', 'float', ...
613 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
614 */
615 template <typename T, unsigned int tChannels>
616 static inline void transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker);
617
618 /**
619 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
620 * @param frame The image to convert, must be valid
621 * @param width The width of the image in pixel, with range [1, infinity)
622 * @param height The height of the image in pixel, with range [1, infinity)
623 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
624 * @param worker Optional worker object to distribute the computation
625 * @tparam tChannels The number of frame channels, with range [2, infinity)
626 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
627 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
628 */
629 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
631
632 /**
633 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
634 * @param source The source image to convert, must be valid
635 * @param target The resulting converted target image, must be valid
636 * @param width The width of the image in pixel, with range [1, infinity)
637 * @param height The height of the image in pixel, with range [1, infinity)
638 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
639 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
640 * @param worker Optional worker object to distribute the computation
641 * @tparam tChannels The number of frame channels, with range [2, infinity)
642 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
643 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
644 */
645 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
647
648 /**
649 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
650 * @param frame The image to convert, must be valid
651 * @param width The width of the image in pixel, with range [1, infinity)
652 * @param height The height of the image in pixel, with range [1, infinity)
653 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
654 * @param worker Optional worker object to distribute the computation
655 * @tparam tChannels The number of frame channels, with range [2, infinity)
656 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
657 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
658 */
659 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
661
662 /**
663 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
664 * @param source The source image to convert, must be valid
665 * @param target The resulting converted target image, must be valid
666 * @param width The width of the image in pixel, with range [1, infinity)
667 * @param height The height of the image in pixel, with range [1, infinity)
668 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
669 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
670 * @param worker Optional worker object to distribute the computation
671 * @tparam tChannels The number of frame channels, with range [2, infinity)
672 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
673 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
674 */
675 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
677
678 /**
679 * Reverses/mirrors the order of pixels in a given row (or a memory block in general).
680 * @param source The pointer to the source pixels, must be valid
681 * @param target The pointer to the target pixels receiving the reversed/mirrored pixel data, must be valid
682 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
683 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
684 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
685 */
686 template <typename T, unsigned int tChannels>
687 static void reverseRowPixelOrder(const T* source, T* target, const size_t size);
688
689 /**
690 * Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
691 * @param data The pointer to the pixels, must be valid
692 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
693 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
694 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
695 */
696 template <typename T, unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data, const size_t size);
698
699 /**
700 * Reverses/mirrors the order of channels in a given row (or a memory block in general).
701 * @param source The pointer to the source pixels, must be valid
702 * @param target The pointer to the target pixels receiving the reversed/mirrored channels, must be valid
703 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
704 * @param unusedOptions An unused options parameters, must be nullptr
705 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
706 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
707 */
708 template <typename T, unsigned int tChannels>
709 static void reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
710
711 /**
712 * Shuffles the channels of row pixels by application of a specified shuffle pattern.
713 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
714 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
715 * <pre>
716 * source pixel R G B A
717 * 0 1 2 3
718 * target pixel B G R A
719 * 2 1 0 3
720 * pattern (with reversed order): 0x3012
721 * </pre>
722 * @param source The pointer to the source pixels, must be valid
723 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
724 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
725 * @param unusedOptions An unused options parameters, must be nullptr
726 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
727 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
728 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
729 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
730 */
731 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
733
734 /**
735 * Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last channel with constant value in the target row.
736 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
737 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
738 * <pre>
739 * source pixel R G B
740 * 0 1 2
741 * target pixel B G R A
742 * 2 1 0
743 * pattern (with reversed order): 0x012
744 * </pre>
745 * @param source The pointer to the source pixels, must be valid
746 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
747 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
748 * @param options Pointer to the constant channel value which will be added to the end of the target channels, must be valid
749 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
750 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
751 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
752 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
753 */
754 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options = nullptr);
756
757 /**
758 * Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the four channels.
759 * This function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
760 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
761 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
762 * @param source The pointer to the source pixels, must be valid
763 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
764 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
765 * @param channelMultiplicationFactors_128 The three uint32_t multiplication factors, one for each channel, with range [0, 128], while the sum of all four factors must be 128, must be valid
766 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
767 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
768 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
769 */
770 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
772
773 /**
774 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation.
775 * This function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
776 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
777 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
778 * The transformation is based on the following pattern:
779 * <pre>
780 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
781 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
782 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
783 * </pre>
784 * With t target, s source, f factor, and b bias/translation.<br>
785 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
786 * @param source The pointer to the source pixels, must be valid
787 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
788 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
789 * @param parameters The 12 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128]
790 */
791 static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
792
793 /**
794 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
795 * This function can be used to e.g., convert RGB24 to YUV24, or BGR24 to YVU24.
796 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
797 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
798 * The transformation is based on the following pattern:
799 * <pre>
800 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
801 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
802 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
803 * </pre>
804 * With t target, s source, f factor, and b bias.<br>
805 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
806 * @param source The pointer to the source pixels, must be valid
807 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
808 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
809 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f22_128, b0, b1, b2, with ranges [-127, 127]
810 */
811 static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
812
813 /**
814 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
815 * This function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
816 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
817 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
818 * The transformation is based on the following pattern:
819 * <pre>
820 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
821 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
822 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
823 * </pre>
824 * With t target, s source, f factor, and b bias.<br>
825 * Factors must be specified in relation to a denominator of 1024, bias values must be specified with a denominator of 1.
826 * @param source The pointer to the source pixels, must be valid
827 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
828 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
829 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_1024, f10_1024, f20_1024, f01_1024, f02_1024, ..., f22_1024, b0, b1, b2, with ranges [-1024 * 16, 1024 * 16]
830 */
831 static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
832
833 /**
834 * Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation (for the first three channels).
835 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
836 * This function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
837 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
838 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
839 * The transformation is based on the following pattern:
840 * <pre>
841 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
842 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
843 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
844 * t3 = valueChannel3
845 * </pre>
846 * With t target, s source, f factor, and b bias/translation.<br>
847 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
848 * @param source The pointer to the source pixels, must be valid
849 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
850 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
851 * @param parameters The 13 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128], valueChannel3, with range [0, 255]
852 */
853 static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
854
855 /**
856 * Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the four channels.
857 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
858 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
859 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
860 * <pre>
861 * t0 = f0 * s0 + f1 * s1 + f2 * s2 + f3 * s3
862 * </pre>
863 * @param source The pointer to the source pixels, must be valid
864 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
865 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
866 * @param channelMultiplicationFactors_128 The four uint32_t multiplication factors, one for each channel, with range [0, 127], while the sum of all four factors must be 128, must be valid
867 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
868 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
869 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
870 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
871 */
872 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
874
875 /**
876 * Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the four channels.
877 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
878 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
879 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
880 * The transformation is based on the following pattern:
881 * <pre>
882 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3
883 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3
884 * </pre>
885 * @param source The pointer to the source pixels, must be valid
886 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
887 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
888 * @param multiplicationFactors_128 The 8 int32_t parameters of the column-aligned 2x4 transformation matrix: f00_128, f10_128, f01_128, ..., f13_128, with range [0, 127], while the sum of all four row factors must be 128, must be valid
889 */
890 static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* multiplicationFactors_128);
891
892 /**
893 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
894 * This function can be used to e.g., convert RGBA32 to YUV24, or BGRA24 to YVU24.
895 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
896 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
897 * The transformation is based on the following pattern:
898 * <pre>
899 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3 + b0, 255)
900 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3 + b1, 255)
901 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + f23 * s3 + b2, 255)
902 * </pre>
903 * With t target, s source, f factor, and b bias.<br>
904 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
905 * @param source The pointer to the source pixels, must be valid
906 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
907 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
908 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f23_128, b0, b1, b2, with ranges [-127, 127]
909 */
910 static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
911
912 /**
913 * Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
914 * @param source The pointer to the source pixels, must be valid
915 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
916 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
917 * @param unusedParameters Unused parameter, must be nullptr
918 * @tparam tChannels The number of channels the source (and target) frame have, with range [1, infinity)
919 */
920 template <unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
922
923 /**
924 * Adds a channel to a given row with generic (zipped) pixel format and copies the information of the new channel from a one-channel image.
925 * The channel can be added at new first channel or as new last channel.
926 * @param sources The pointer to the multi-channel source frame and to the single-channel source frame, must be valid
927 * @param targets The one pointer to the target image, must be valid
928 * @param multipleRowIndex The index of the multiple-row to be handled, with range [0, height - 1]
929 * @param width The width of the frame in pixel, with range [1, infinity), must be even
930 * @param height The height of the frame in pixel, with range [1, infinity), must be even
931 * @param conversionFlag The conversion to be applied
932 * @param options The 1 options parameters: padding parameters of 1-channel source frame, must be valid
933 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
934 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
935 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
936 */
937 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
938 static void addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options);
939
940 /**
941 * Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified value.
942 * The channel can be added at new first channel or as new last channel.
943 * @param source The pointer to the source pixels, must be valid
944 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
945 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
946 * @param channelValueParameter The pointer to the value of the channel to be set (with data type 'T'), must be valid
947 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
948 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
949 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
950 */
951 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
952 static void addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter);
953
954 /**
955 * Copies one channel from a source row to a target row with generic (zipped) pixel format.
956 * @param source The pointer to the source pixels, must be valid
957 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
958 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
959 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
960 * @param unusedParameters Unused parameters, must be nullptr
961 * @tparam tSourceChannels Number of channels of the source frame, with range [1, infinity)
962 * @tparam tTargetChannels Number of channels of the target frame, with range [1, infinity)
963 * @tparam tSourceChannelIndex The index of the source channel to be copied, with range [0, tSourceChannels - 1]
964 * @tparam tTargetChannelIndex The index of the target channel to be copied, with range [0, tTargetChannels - 1]
965 */
966 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
967 static void copyChannelRow(const T* source, T* target, const size_t size, const void* unusedParameters = nullptr);
968
969 protected:
970
971 /**
972 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
973 * @param sourceFrame The frame to be separated, must be valid
974 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
975 * @param width The width of the source frame in pixel, with range [1, infinity)
976 * @param height The height of the source frame in pixel, with range [1, infinity)
977 * @param channels The number of channels the source frame has, with range [1, infinity)
978 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
979 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
980 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
981 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
982 */
983 template <typename TSource, typename TTarget>
984 static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
985
986 /**
987 * Zips/interleaves 1-channel images into one image with n-channels.
988 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
989 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
990 * @param width The width of the source frames in pixel, with range [1, infinity)
991 * @param height The height of the source frames in pixel, with range [1, infinity)
992 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
993 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
994 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
995 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
996 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
997 */
998 template <typename TSource, typename TTarget>
999 static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
1000
1001 /**
1002 * Sets one channel of a frame with one unique value.
1003 * @param frame The frame in that one channel of each pixel will be set, must be valid
1004 * @param width The width of the frame in pixel, with range [1, infinity)
1005 * @param value The value to be set
1006 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1007 * @param firstRow First row to be handled
1008 * @param numberRows Number of rows to be handled
1009 * @tparam T Data type of each channel pixel value
1010 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
1011 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
1012 */
1013 template <typename T, unsigned int tChannel, unsigned int tChannels>
1014 static void setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1015
1016 /**
1017 * Applies a specific modifier function on each pixel.
1018 * @param source The source frame providing the pixel information, must be valid
1019 * @param target The target frame receiving the pixel information, must be valid
1020 * @param width The width of the source frame in pixel
1021 * @param height The height of the source frame in pixel
1022 * @param conversionFlag The conversion to be applied
1023 * @param firstRow First row to be handled
1024 * @param numberRows Number of rows to be handled
1025 * @tparam T Data type of each channel pixel value
1026 * @tparam tChannels Number of data channels, with range [1, infinity)
1027 * @tparam tPixelFunction Pixel modification function
1028 */
1029 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1031
1032 /**
1033 * Applies a specific modifier function on each pixel.
1034 * @param source The source frame providing the pixel information, must be valid
1035 * @param target The target frame receiving the pixel information, must be valid
1036 * @param width The width of the source frame in pixel, with range [1, infinity)
1037 * @param height The height of the source frame in pixel, with range [1, infinity)
1038 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
1039 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
1040 * @param conversionFlag The conversion to be applied
1041 * @param firstRow First row to be handled
1042 * @param numberRows Number of rows to be handled
1043 * @tparam TSource Data type of each source channel pixel value
1044 * @tparam TTarget Data type of each target channel pixel value
1045 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
1046 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
1047 * @tparam tPixelFunction Pixel modification function
1048 */
1049 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1051
1052 /**
1053 * Generic bivariate pixel operations
1054 * @param source0 First source frame
1055 * @param source1 Second source frame
1056 * @param target The target frame
1057 * @param width The width of the source frame in pixel, with range [1, infinity)
1058 * @param height The height of the source frame in pixel, with range [1, infinity)
1059 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
1060 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
1061 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1062 * @param conversionFlag The conversion to be applied
1063 * @param firstRow First row to be handled
1064 * @param numberRows Number of rows to be handled
1065 * @tparam TSource0 Type of the first data source
1066 * @tparam TSource1 Type of the second data source
1067 * @tparam TTarget Type of the target
1068 * @tparam TIntermediate Type for the computation of intermediate result, e.g. if TSource0 and TSource1 are different
1069 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
1070 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
1071 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
1072 */
1073 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1075
1076 /**
1077 * Applies a row operator to a subset of all rows of a source image.
1078 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
1079 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
1080 * @param source The source frame to which the row operator is applied, must be valid
1081 * @param target The target frame receiving the result of the row operator, must be valid
1082 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
1083 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
1084 * @param sourceStrideElements The number of stride elements at the end of each source row, in elements, with range [width * tSourceChannels, infinity)
1085 * @param targetStrideElements The number of padding elements at the end of each target row, in elements, with range [width * tTargetChannels, infinity)
1086 * @param rowOperatorFunction The pointer to the row operator function, must be valid
1087 * @param firstRow The first row to be handled, with range [0, height - 1]
1088 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1089 * @tparam TSource The data type of the source elements
1090 * @tparam TTarget The data type of the target elements
1091 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
1092 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
1093 */
1094 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows);
1096
1097 /**
1098 * Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
1099 * @param source The source frame buffer, must be valid
1100 * @param target The target frame buffer, must be valid
1101 * @param width The width of the frame in pixel, with range [1, infinity)
1102 * @param height The height of the frame in pixel, with range [1, infinity)
1103 * @param conversionFlag The conversion to be applied
1104 * @param rowReversePixelOrderFunction The function able to reverse the pixel order, must be valid
1105 * @param bytesPerRow The actual number of bytes each row covers, not including optional padding bytes at the end of each row, with range [width, infinity)
1106 * @param sourceStrideBytes The number of bytes between to start points of successive rows in the source frame, with range [0, infinity)
1107 * @param targetStrideBytes The number of bytes between to start points of successive rows in the target frame, with range [0, infinity)
1108 * @param firstRow The first row to be handled, with range [0, height - 1]
1109 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1110 */
1111 static void transformGenericSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows);
1112
1113 /**
1114 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1115 * @param frame The image to convert, must be valid
1116 * @param width The width of the image in pixel, with range [1, infinity)
1117 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1118 * @param firstRow The first row to be handled, with range [0, height - 1]
1119 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1120 * @tparam tChannels The number of frame channels, with range [2, infinity)
1121 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1122 */
1123 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1125
1126 /**
1127 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1128 * @param source The source image to convert, must be valid
1129 * @param target The resulting converted target image, must be valid
1130 * @param width The width of the image in pixel, with range [1, infinity)
1131 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1132 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1133 * @param firstRow The first row to be handled, with range [0, height - 1]
1134 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1135 * @tparam tChannels The number of frame channels, with range [2, infinity)
1136 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1137 */
1138 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1140
1141 /**
1142 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1143 * @param frame The image to convert, must be valid
1144 * @param width The width of the image in pixel, with range [1, infinity)
1145 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1146 * @param firstRow The first row to be handled, with range [0, height - 1]
1147 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1148 * @tparam tChannels The number of frame channels, with range [2, infinity)
1149 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1150 */
1151 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1153
1154 /**
1155 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1156 * @param source The source image to convert, must be valid
1157 * @param target The resulting converted target image, must be valid
1158 * @param width The width of the image in pixel, with range [1, infinity)
1159 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1160 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1161 * @param firstRow The first row to be handled, with range [0, height - 1]
1162 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1163 * @tparam tChannels The number of frame channels, with range [2, infinity)
1164 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1165 */
1166 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1168
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1170
1171 /**
1172 * Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the three channels.
1173 * This function can be used to e.g., convert RGB24 to Y8, or RGB24 to Y8.
1174 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1175 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1176 * @param source The pointer to the 16 source pixels (with 3 channels = 64 bytes) to convert, must be valid
1177 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1178 * @param multiplicationFactors0_128_u_16x8 The multiplication factor for the first channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1179 * @param multiplicationFactors1_128_u_16x8 The multiplication factor for the second channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1180 * @param multiplicationFactors2_128_u_16x8 The multiplication factor for the third channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1181 */
1182 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8);
1183
1184 /**
1185 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1186 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1187 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1188 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1189 * The transformation is based on the following pattern:
1190 * <pre>
1191 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1192 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1193 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1194 * </pre>
1195 * With t target, s source, f factor, and b bias.
1196 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1197 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1198 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1199 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1200 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1201 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1202 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1203 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1204 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1205 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1206 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1207 * @param biasChannel0_s_16x8 The bias (translation) value for the first target channel, with range [-127, 127]
1208 * @param biasChannel1_s_16x8 The bias (translation) value for the second target channel, with range [-127, 127]
1209 * @param biasChannel2_s_16x8 The bias (translation) value for the third target channel, with range [-127, 127]
1210 */
1211 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8);
1212
1213 /**
1214 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1215 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1216 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1217 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1218 * The transformation is based on the following pattern:
1219 * <pre>
1220 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1221 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1222 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1223 * </pre>
1224 * With t target, s source, f factor, and b bias.
1225 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1226 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1227 * @param factorChannel00_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1228 * @param factorChannel10_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1229 * @param factorChannel20_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1230 * @param factorChannel01_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1231 * @param factorChannel11_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1232 * @param factorChannel21_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1233 * @param factorChannel02_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1234 * @param factorChannel12_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1235 * @param factorChannel22_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1236 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-1024 * 16, 1024 * 16]
1237 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-1024 * 16, 1024 * 16]
1238 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-1024 * 16, 1024 * 16]
1239 */
1240 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4);
1241
1242 /**
1243 * Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the four channels.
1244 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1245 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1246 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1247 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1248 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1249 * @param multiplicationFactors0123_128_s_32x The four individual multiplication factors, one for each channel, with ranges [0, 127], while the sum of all four factors must be 128
1250 */
1251 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x);
1252
1253 /**
1254 * Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear combination of the four channels.
1255 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1256 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1257 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1258 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1259 * @param target The pointer to the 16 target pixels (with 2 channel = 32 bytes) receiving the converted pixel data, must be valid
1260 * @param multiplicationFactorsChannel0_0123_128_s_16x8 The four individual multiplication factors for the first target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1261 * @param multiplicationFactorsChannel1_0123_128_s_16x8 The four individual multiplication factors for the second target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1262 */
1263 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1264
1265#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1266
1267#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1268
1269 /**
1270 * Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the three channels.
1271 * Thus, this function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
1272 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1273 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1274 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1275 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1276 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 128]
1277 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 128 - factorChannel0 - factorChannel2]
1278 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 128 - factorChannel0 - factorChannel1]
1279 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1280 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1281 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1282 */
1283 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
1284 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8);
1285
1286 /**
1287 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1288 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1289 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1290 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1291 * The transformation is based on the following pattern:
1292 * <pre>
1293 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1294 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1295 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1296 * </pre>
1297 * With t target, s source, f factor, and b bias/translation.
1298 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1299 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1300 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1301 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1302 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1303 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1304 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1305 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1306 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1307 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1308 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1309 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1310 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1311 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1312 */
1313 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1314
1315 /**
1316 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1317 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1318 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1319 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1320 * The transformation is based on the following pattern:
1321 * <pre>
1322 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1323 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1324 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1325 * </pre>
1326 * With t target, s source, f factor, and b bias/translation.
1327 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1328 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1329 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1330 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1331 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1332 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1333 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1334 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1335 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1336 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1337 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1338 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1339 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1340 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1341 */
1342 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1343
1344 /**
1345 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1346 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1347 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (also with 128 as denominator).<br>
1348 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1349 * The transformation is based on the following pattern:
1350 * <pre>
1351 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1352 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1353 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1354 * </pre>
1355 * With t target, s source, f factor, and b bias.
1356 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1357 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1358 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1359 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1360 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1361 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1362 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1363 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1364 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1365 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1366 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1367 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1368 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1369 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1370 */
1371 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1372
1373 /**
1374 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1375 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1376 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1377 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1378 * The transformation is based on the following pattern:
1379 * <pre>
1380 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1381 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1382 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1383 * </pre>
1384 * With t target, s source, f factor, and b bias.
1385 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1386 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1387 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1388 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1389 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1390 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1391 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1392 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1393 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1394 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1395 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1396 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1397 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1398 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1399 */
1400 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1401
1402 /**
1403 * Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1404 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1405 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1406 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1407 * The transformation is based on the following pattern:
1408 * <pre>
1409 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1410 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1411 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1412 * </pre>
1413 * With t target, s source, f factor, and b bias.
1414 * @param source The pointer to the 16 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1415 * @param target The pointer to the 16 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1416 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1417 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1418 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1419 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1420 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1421 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1422 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1423 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1424 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1425 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1426 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1427 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1428 */
1429 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1430
1431 /**
1432 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1433 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1434 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1435 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1436 * The transformation is based on the following pattern:
1437 * <pre>
1438 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1439 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1440 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1441 * </pre>
1442 * With t target, s source, f factor, and b bias.
1443 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1444 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1445 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1446 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1447 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1448 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1449 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1450 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1451 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1452 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1453 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1454 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1455 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1456 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1457 */
1458 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1459
1460 /**
1461 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1462 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
1463 * Thus, this function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
1464 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1465 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1466 * The transformation is based on the following pattern:
1467 * <pre>
1468 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1469 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1470 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1471 * t3 = valueChannel3
1472 * </pre>
1473 * With t target, s source, f factor, and b bias.
1474 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1475 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1476 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1477 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1478 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1479 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1480 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1481 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1482 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1483 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1484 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1485 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1486 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1487 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 138]
1488 * @param channelValue3_u_8x16 The constant value for the fourth target channel, with range [0, 255]
1489 */
1490 static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16);
1491
1492 /**
1493 * Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the four channels.
1494 * Thus, this function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1495 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1496 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1497 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1498 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1499 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 127]
1500 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 127 - factorChannel0 - factorChannel2 - factorChannel3]
1501 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel3]
1502 * @param factorChannel3_128_u_8x8 The multiplication factor (8 identical factors) for the fourth channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel2]
1503 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1504 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1505 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1506 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
1507 */
1508 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
1509 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8);
1510
1511 /**
1512 * Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combination of the four channels.
1513 * Thus, this function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1514 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1515 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1516 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1517 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1518 * @param factorChannel00_128_u_8x8 The multiplication factor (8 identical factors) for the first target and first source channel, with range [0, 127]
1519 * @param factorChannel10_128_u_8x8 The multiplication factor (8 identical factors) for the second target and first source channel, with range [0, 127]
1520 * @param factorChannel01_128_u_8x8 The multiplication factor (8 identical factors) for the first target and second source channel, with range [0, 127 - factorChannel00 - factorChannel02 - factorChannel03]
1521 * @param factorChannel11_128_u_8x8 The multiplication factor (8 identical factors) for the second target and second source channel, with range [0, 127 - factorChannel10 - factorChannel12 - factorChannel13]
1522 * @param factorChannel02_128_u_8x8 The multiplication factor (8 identical factors) for the first target and third source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel03]
1523 * @param factorChannel12_128_u_8x8 The multiplication factor (8 identical factors) for the second target and third source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel13]
1524 * @param factorChannel03_128_u_8x8 The multiplication factor (8 identical factors) for the first target and fourth source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel02]
1525 * @param factorChannel13_128_u_8x8 The multiplication factor (8 identical factors) for the second target and fourth source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel12]
1526 */
1527 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8);
1528
1529#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1530
1531};
1532
1533#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1534
1535template <>
1536inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1537{
1538 ocean_assert(sourceFrame != nullptr);
1539 ocean_assert(targetFrames != nullptr);
1540
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1543
1544 constexpr unsigned int tChannels = 2u;
1545
1546 bool allTargetFramesContinuous = true;
1547
1548 if (targetFramesPaddingElements != nullptr)
1549 {
1550 for (unsigned int n = 0u; n < tChannels; ++n)
1551 {
1552 if (targetFramesPaddingElements[n] != 0u)
1553 {
1554 allTargetFramesContinuous = false;
1555 break;
1556 }
1557 }
1558 }
1559
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1563
1564 constexpr unsigned int tBlockSize = 16u;
1565
1566 uint8x16x2_t source_8x16x2;
1567
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1569 {
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1573
1574 for (unsigned int n = 0u; n < blocks; ++n)
1575 {
1576 source_8x16x2 = vld2q_u8(source);
1577
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1580
1581 source += tBlockSize * tChannels;
1582
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1585 }
1586
1587 for (unsigned int n = 0u; n < remaining; ++n)
1588 {
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1591 }
1592 }
1593 else
1594 {
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1597
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1600
1601 for (unsigned int y = 0u; y < height; ++y)
1602 {
1603 for (unsigned int n = 0u; n < blocks; ++n)
1604 {
1605 source_8x16x2 = vld2q_u8(source);
1606
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1609
1610 source += tBlockSize * tChannels;
1611
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1614 }
1615
1616 for (unsigned int n = 0u; n < remaining; ++n)
1617 {
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1620 }
1621
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1625 }
1626 }
1627}
1628
1629template <>
1630inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1631{
1632 ocean_assert(sourceFrame != nullptr);
1633 ocean_assert(targetFrames != nullptr);
1634
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1637
1638 constexpr unsigned int tChannels = 3u;
1639
1640 bool allTargetFramesContinuous = true;
1641
1642 if (targetFramesPaddingElements != nullptr)
1643 {
1644 for (unsigned int n = 0u; n < tChannels; ++n)
1645 {
1646 if (targetFramesPaddingElements[n] != 0u)
1647 {
1648 allTargetFramesContinuous = false;
1649 break;
1650 }
1651 }
1652 }
1653
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1658
1659 constexpr unsigned int tBlockSize = 16u;
1660
1661 uint8x16x3_t source_8x16x3;
1662
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1664 {
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1668
1669 for (unsigned int n = 0u; n < blocks; ++n)
1670 {
1671 source_8x16x3 = vld3q_u8(source);
1672
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1676
1677 source += tBlockSize * tChannels;
1678
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1682 }
1683
1684 for (unsigned int n = 0u; n < remaining; ++n)
1685 {
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1689 }
1690 }
1691 else
1692 {
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1696
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1699
1700 for (unsigned int y = 0u; y < height; ++y)
1701 {
1702 for (unsigned int n = 0u; n < blocks; ++n)
1703 {
1704 source_8x16x3 = vld3q_u8(source);
1705
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1709
1710 source += tBlockSize * tChannels;
1711
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1715 }
1716
1717 for (unsigned int n = 0u; n < remaining; ++n)
1718 {
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1722 }
1723
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1728 }
1729 }
1730}
1731
1732template <>
1733inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1734{
1735 ocean_assert(sourceFrame != nullptr);
1736 ocean_assert(targetFrames != nullptr);
1737
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1740
1741 constexpr unsigned int tChannels = 4u;
1742
1743 bool allTargetFramesContinuous = true;
1744
1745 if (targetFramesPaddingElements != nullptr)
1746 {
1747 for (unsigned int n = 0u; n < tChannels; ++n)
1748 {
1749 if (targetFramesPaddingElements[n] != 0u)
1750 {
1751 allTargetFramesContinuous = false;
1752 break;
1753 }
1754 }
1755 }
1756
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1762
1763 constexpr unsigned int tBlockSize = 16u;
1764
1765 uint8x16x4_t source_8x16x4;
1766
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1768 {
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1772
1773 for (unsigned int n = 0u; n < blocks; ++n)
1774 {
1775 source_8x16x4 = vld4q_u8(source);
1776
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1781
1782 source += tBlockSize * tChannels;
1783
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1788 }
1789
1790 for (unsigned int n = 0u; n < remaining; ++n)
1791 {
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1796 }
1797 }
1798 else
1799 {
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
1804
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1807
1808 for (unsigned int y = 0u; y < height; ++y)
1809 {
1810 for (unsigned int n = 0u; n < blocks; ++n)
1811 {
1812 source_8x16x4 = vld4q_u8(source);
1813
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1818
1819 source += tBlockSize * tChannels;
1820
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1825 }
1826
1827 for (unsigned int n = 0u; n < remaining; ++n)
1828 {
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1833 }
1834
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1840 }
1841 }
1842}
1843
1844#endif // OCEAN_HARDWARE_NEON_VERSION
1845
1846template <typename TSource, typename TTarget, unsigned int tChannels>
1847void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1848{
1849 ocean_assert(sourceFrame != nullptr);
1850 ocean_assert(targetFrames != nullptr);
1851
1852 ocean_assert(width != 0u && height != 0u);
1853
1854 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
1855
1856 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
1857 {
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1859 return;
1860 }
1861
1862#ifdef OCEAN_DEBUG
1863 for (unsigned int c = 0u; c < tChannels; ++c)
1864 {
1865 ocean_assert(targetFrames[c] != nullptr);
1866 }
1867#endif
1868
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
1870 {
1871 for (unsigned int n = 0u; n < width * height; ++n)
1872 {
1873 for (unsigned int c = 0u; c < tChannels; ++c)
1874 {
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1876 }
1877 }
1878 }
1879 else if (targetFramesPaddingElements == nullptr)
1880 {
1881 ocean_assert(sourceFramePaddingElements != 0u);
1882
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1884
1885 for (unsigned int y = 0u; y < height; ++y)
1886 {
1887 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1888
1889 const unsigned int targetRowOffset = y * width;
1890
1891 for (unsigned int x = 0u; x < width; ++x)
1892 {
1893 for (unsigned int c = 0u; c < tChannels; ++c)
1894 {
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1896 }
1897 }
1898 }
1899 }
1900 else
1901 {
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1903
1904 Indices32 targetFrameStrideElements(tChannels);
1905
1906 for (unsigned int c = 0u; c < tChannels; ++c)
1907 {
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1909 }
1910
1911 for (unsigned int y = 0u; y < height; ++y)
1912 {
1913 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1914
1915 for (unsigned int x = 0u; x < width; ++x)
1916 {
1917 for (unsigned int c = 0u; c < tChannels; ++c)
1918 {
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1920 }
1921 }
1922 }
1923 }
1924}
1925
1926template <typename TSource, typename TTarget>
1927void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1928{
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1931
1932 if (targetFrames.size() == 2)
1933 {
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1935 }
1936 else if (targetFrames.size() == 3)
1937 {
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1939 }
1940 else if (targetFrames.size() == 4)
1941 {
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1943 }
1944 else
1945 {
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1947 }
1948}
1949
1950#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1951
1952template <>
1953inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1954{
1955 ocean_assert(sourceFrames != nullptr);
1956 ocean_assert(targetFrame != nullptr);
1957
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1960
1961 constexpr unsigned int tChannels = 2u;
1962
1963 bool allSourceFramesContinuous = true;
1964
1965 if (sourceFramesPaddingElements != nullptr)
1966 {
1967 for (unsigned int n = 0u; n < tChannels; ++n)
1968 {
1969 if (sourceFramesPaddingElements[n] != 0u)
1970 {
1971 allSourceFramesContinuous = false;
1972 break;
1973 }
1974 }
1975 }
1976
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1980
1981 constexpr unsigned int tBlockSize = 16u;
1982
1983 uint8x16x2_t source_8x16x2;
1984
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1986 {
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1990
1991 for (unsigned int n = 0u; n < blocks; ++n)
1992 {
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1995
1996 vst2q_u8(target, source_8x16x2);
1997
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2000
2001 target += tBlockSize * tChannels;
2002 }
2003
2004 for (unsigned int n = 0u; n < remaining; ++n)
2005 {
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2008 }
2009 }
2010 else
2011 {
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2014
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2017
2018 for (unsigned int y = 0u; y < height; ++y)
2019 {
2020 for (unsigned int n = 0u; n < blocks; ++n)
2021 {
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2024
2025 vst2q_u8(target, source_8x16x2);
2026
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2029
2030 target += tBlockSize * tChannels;
2031 }
2032
2033 for (unsigned int n = 0u; n < remaining; ++n)
2034 {
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2037 }
2038
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2042 }
2043 }
2044}
2045
2046template <>
2047inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2048{
2049 ocean_assert(sourceFrames != nullptr);
2050 ocean_assert(targetFrame != nullptr);
2051
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2054
2055 constexpr unsigned int tChannels = 3u;
2056
2057 bool allSourceFramesContinuous = true;
2058
2059 if (sourceFramesPaddingElements != nullptr)
2060 {
2061 for (unsigned int n = 0u; n < tChannels; ++n)
2062 {
2063 if (sourceFramesPaddingElements[n] != 0u)
2064 {
2065 allSourceFramesContinuous = false;
2066 break;
2067 }
2068 }
2069 }
2070
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2075
2076 constexpr unsigned int tBlockSize = 16u;
2077
2078 uint8x16x3_t source_8x16x3;
2079
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2081 {
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2085
2086 for (unsigned int n = 0u; n < blocks; ++n)
2087 {
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2091
2092 vst3q_u8(target, source_8x16x3);
2093
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2097
2098 target += tBlockSize * tChannels;
2099 }
2100
2101 for (unsigned int n = 0u; n < remaining; ++n)
2102 {
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2106 }
2107 }
2108 else
2109 {
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2113
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2116
2117 for (unsigned int y = 0u; y < height; ++y)
2118 {
2119 for (unsigned int n = 0u; n < blocks; ++n)
2120 {
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2124
2125 vst3q_u8(target, source_8x16x3);
2126
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2130
2131 target += tBlockSize * tChannels;
2132 }
2133
2134 for (unsigned int n = 0u; n < remaining; ++n)
2135 {
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2139 }
2140
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2145 }
2146 }
2147}
2148
2149template <>
2150inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2151{
2152 ocean_assert(sourceFrames != nullptr);
2153 ocean_assert(targetFrame != nullptr);
2154
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2157
2158 constexpr unsigned int tChannels = 4u;
2159
2160 bool allSourceFramesContinuous = true;
2161
2162 if (sourceFramesPaddingElements != nullptr)
2163 {
2164 for (unsigned int n = 0u; n < tChannels; ++n)
2165 {
2166 if (sourceFramesPaddingElements[n] != 0u)
2167 {
2168 allSourceFramesContinuous = false;
2169 break;
2170 }
2171 }
2172 }
2173
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2179
2180 constexpr unsigned int tBlockSize = 16u;
2181
2182 uint8x16x4_t source_8x16x4;
2183
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2185 {
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2189
2190 for (unsigned int n = 0u; n < blocks; ++n)
2191 {
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2196
2197 vst4q_u8(target, source_8x16x4);
2198
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2203
2204 target += tBlockSize * tChannels;
2205 }
2206
2207 for (unsigned int n = 0u; n < remaining; ++n)
2208 {
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2213 }
2214 }
2215 else
2216 {
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2221
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2224
2225 for (unsigned int y = 0u; y < height; ++y)
2226 {
2227 for (unsigned int n = 0u; n < blocks; ++n)
2228 {
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2233
2234 vst4q_u8(target, source_8x16x4);
2235
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2240
2241 target += tBlockSize * tChannels;
2242 }
2243
2244 for (unsigned int n = 0u; n < remaining; ++n)
2245 {
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2250 }
2251
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2257 }
2258 }
2259}
2260
2261template <>
2262inline void FrameChannels::zipChannels<float, uint8_t, 2u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2263{
2264 ocean_assert(sourceFrames != nullptr);
2265 ocean_assert(targetFrame != nullptr);
2266
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2269
2270 constexpr unsigned int tChannels = 2u;
2271
2272 bool allSourceFramesContinuous = true;
2273
2274 if (sourceFramesPaddingElements != nullptr)
2275 {
2276 for (unsigned int n = 0u; n < tChannels; ++n)
2277 {
2278 if (sourceFramesPaddingElements[n] != 0u)
2279 {
2280 allSourceFramesContinuous = false;
2281 break;
2282 }
2283 }
2284 }
2285
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2289
2290 constexpr unsigned int tBlockSize = 16u;
2291
2292 uint8x16x2_t target_8x16x2;
2293
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2295 {
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2299
2300 for (unsigned int n = 0u; n < blocks; ++n)
2301 {
2302 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2303 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2304
2305 vst2q_u8(target, target_8x16x2);
2306
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2309
2310 target += tBlockSize * tChannels;
2311 }
2312
2313 for (unsigned int n = 0u; n < remaining; ++n)
2314 {
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2317
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2320 }
2321 }
2322 else
2323 {
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2326
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2329
2330 for (unsigned int y = 0u; y < height; ++y)
2331 {
2332 for (unsigned int n = 0u; n < blocks; ++n)
2333 {
2334 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2335 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2336
2337 vst2q_u8(target, target_8x16x2);
2338
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2341
2342 target += tBlockSize * tChannels;
2343 }
2344
2345 for (unsigned int n = 0u; n < remaining; ++n)
2346 {
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2349
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2352 }
2353
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2357 }
2358 }
2359}
2360
2361template <>
2362inline void FrameChannels::zipChannels<float, uint8_t, 3u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2363{
2364 ocean_assert(sourceFrames != nullptr);
2365 ocean_assert(targetFrame != nullptr);
2366
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2369
2370 constexpr unsigned int tChannels = 3u;
2371
2372 bool allSourceFramesContinuous = true;
2373
2374 if (sourceFramesPaddingElements != nullptr)
2375 {
2376 for (unsigned int n = 0u; n < tChannels; ++n)
2377 {
2378 if (sourceFramesPaddingElements[n] != 0u)
2379 {
2380 allSourceFramesContinuous = false;
2381 break;
2382 }
2383 }
2384 }
2385
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2390
2391 constexpr unsigned int tBlockSize = 16u;
2392
2393 uint8x16x3_t target_8x16x3;
2394
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2396 {
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2400
2401 for (unsigned int n = 0u; n < blocks; ++n)
2402 {
2403 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2404 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2405 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2406
2407 vst3q_u8(target, target_8x16x3);
2408
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2412
2413 target += tBlockSize * tChannels;
2414 }
2415
2416 for (unsigned int n = 0u; n < remaining; ++n)
2417 {
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2421
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2425 }
2426 }
2427 else
2428 {
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2432
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2435
2436 for (unsigned int y = 0u; y < height; ++y)
2437 {
2438 for (unsigned int n = 0u; n < blocks; ++n)
2439 {
2440 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2441 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2442 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2443
2444
2445 vst3q_u8(target, target_8x16x3);
2446
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2450
2451 target += tBlockSize * tChannels;
2452 }
2453
2454 for (unsigned int n = 0u; n < remaining; ++n)
2455 {
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2459
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2463 }
2464
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2469 }
2470 }
2471}
2472
2473template <>
2474inline void FrameChannels::zipChannels<float, uint8_t, 4u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2475{
2476 ocean_assert(sourceFrames != nullptr);
2477 ocean_assert(targetFrame != nullptr);
2478
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2481
2482 constexpr unsigned int tChannels = 4u;
2483
2484 bool allSourceFramesContinuous = true;
2485
2486 if (sourceFramesPaddingElements != nullptr)
2487 {
2488 for (unsigned int n = 0u; n < tChannels; ++n)
2489 {
2490 if (sourceFramesPaddingElements[n] != 0u)
2491 {
2492 allSourceFramesContinuous = false;
2493 break;
2494 }
2495 }
2496 }
2497
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2503
2504 constexpr unsigned int tBlockSize = 16u;
2505
2506 uint8x16x4_t target_8x16x4;
2507
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2509 {
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2513
2514 for (unsigned int n = 0u; n < blocks; ++n)
2515 {
2516 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2517 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2518 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2519 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2520
2521 vst4q_u8(target, target_8x16x4);
2522
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2527
2528 target += tBlockSize * tChannels;
2529 }
2530
2531 for (unsigned int n = 0u; n < remaining; ++n)
2532 {
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2537
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2542 }
2543 }
2544 else
2545 {
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2550
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2553
2554 for (unsigned int y = 0u; y < height; ++y)
2555 {
2556 for (unsigned int n = 0u; n < blocks; ++n)
2557 {
2558 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2559 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2560 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2561 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2562
2563 vst4q_u8(target, target_8x16x4);
2564
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2569
2570 target += tBlockSize * tChannels;
2571 }
2572
2573 for (unsigned int n = 0u; n < remaining; ++n)
2574 {
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2579
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2584 }
2585
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2591 }
2592 }
2593}
2594
2595#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2596
2597template <typename TSource, typename TTarget, unsigned int tChannels>
2598void FrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2599{
2600 ocean_assert(sourceFrames != nullptr);
2601 ocean_assert(targetFrame != nullptr);
2602
2603 ocean_assert(width != 0u && height != 0u);
2604
2605 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
2606
2607 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
2608 {
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2610 return;
2611 }
2612
2613 bool allSourceFramesContinuous = true;
2614
2615 if (sourceFramesPaddingElements != nullptr)
2616 {
2617 for (unsigned int n = 0u; n < tChannels; ++n)
2618 {
2619 if (sourceFramesPaddingElements[n] != 0u)
2620 {
2621 allSourceFramesContinuous = false;
2622 break;
2623 }
2624 }
2625 }
2626
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2628 {
2629 for (unsigned int n = 0u; n < width * height; ++n)
2630 {
2631 for (unsigned int c = 0u; c < tChannels; ++c)
2632 {
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2634 }
2635 }
2636 }
2637 else
2638 {
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2640
2641 Indices32 sourceFrameStrideElements(tChannels);
2642
2643 for (unsigned int c = 0u; c < tChannels; ++c)
2644 {
2645 if (sourceFramesPaddingElements == nullptr)
2646 {
2647 sourceFrameStrideElements[c] = width;
2648 }
2649 else
2650 {
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2652 }
2653 }
2654
2655 for (unsigned int y = 0u; y < height; ++y)
2656 {
2657 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
2658
2659 for (unsigned int x = 0u; x < width; ++x)
2660 {
2661 for (unsigned int c = 0u; c < tChannels; ++c)
2662 {
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2664 }
2665 }
2666 }
2667 }
2668}
2669
2670template <typename TSource, typename TTarget>
2671void FrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
2672{
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2675
2676 if (sourceFrames.size() == 2)
2677 {
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2679 }
2680 else if (sourceFrames.size() == 3)
2681 {
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2683 }
2684 else if (sourceFrames.size() == 4)
2685 {
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2687 }
2688 else
2689 {
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2691 }
2692}
2693
2694template <typename T, unsigned int tSourceChannels>
2695inline void FrameChannels::addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2696{
2697 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2698
2699 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2702
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2704
2705 const void* sources[2] = {source, sourceNewChannel};
2706
2707 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2708}
2709
2710template <typename T, unsigned int tSourceChannels>
2711inline void FrameChannels::addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2712{
2713 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2714
2715 ocean_assert(source != nullptr && target != nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2717
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2719
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2722
2723 const void* channelValueParameter = (const void*)(&newChannelValue);
2724
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2726
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2728}
2729
2730template <typename T, unsigned int tSourceChannels>
2731inline void FrameChannels::addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2732{
2733 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2734
2735 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2738
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2740
2741 const void* sources[2] = {source, sourceNewChannel};
2742
2743 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2744}
2745
2746template <typename T, unsigned int tSourceChannels>
2747inline void FrameChannels::addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2748{
2749 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2750
2751 ocean_assert(source != nullptr && target != nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2753
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2755
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2758
2759 const void* channelValueParameter = (const void*)(&newChannelValue);
2760
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2762
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2764}
2765
2766template <typename T, unsigned int tSourceChannels>
2767inline void FrameChannels::removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2768{
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2770
2771 ocean_assert(source != nullptr && target != nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2773
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2776
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2778
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2780}
2781
2782template <typename T, unsigned int tSourceChannels>
2783inline void FrameChannels::removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2784{
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2786
2787 ocean_assert(source != nullptr && target != nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2789
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2792
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2794
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2796}
2797
2798template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
2799inline void FrameChannels::copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2800{
2801 static_assert(tSourceChannels >= 1u, "Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u, "Invalid number of channels!");
2803
2804 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel index!");
2806
2807 ocean_assert(source != nullptr && target != nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2809
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2812
2813 constexpr RowReversePixelOrderInPlaceFunction<T> reversePixelOrderRowInPlaceFunction = nullptr;
2814
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2816
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous, nullptr, worker);
2818}
2819
2820template <typename T, unsigned int tChannel, unsigned int tChannels>
2821inline void FrameChannels::setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker)
2822{
2823 static_assert(tChannels >= 1u, "Invalid channel number!");
2824 static_assert(tChannel < tChannels, "Invalid channel index!");
2825
2826 ocean_assert(frame != nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2828
2829 if (worker)
2830 {
2831 worker->executeFunction(Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2832 }
2833 else
2834 {
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2836 }
2837}
2838
2839template <typename T, unsigned int tChannels>
2840inline void FrameChannels::reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2841{
2842 static_assert(tChannels >= 1u, "Invalid channel number!");
2843
2844 ocean_assert(source != nullptr && target != nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2846
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2849
2850 constexpr bool areContinuous = false; // even if both images are continuous, we must reverse each line by another
2851
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous, nullptr, worker);
2853}
2854
2855template <typename T, unsigned int tChannels>
2856void FrameChannels::reverseRowPixelOrder(const T* source, T* target, const size_t size)
2857{
2858 static_assert(tChannels >= 1u, "Invalid channel number!");
2859
2860 ocean_assert(source != nullptr && target != nullptr);
2861 ocean_assert(size >= 1);
2862
2863#ifdef OCEAN_DEBUG
2864 const T* const debugSourceStart = source;
2865 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
2866
2867 const T* const debugTargetStart = target;
2868 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
2869#endif
2870
2871 // moving target to the end of the memory block
2872 target += size * tChannels;
2873
2874 const T* const sourceEnd = source + size * tChannels;
2875
2876#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2877
2878 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2879 {
2880 const size_t blocks16 = size / size_t(16);
2881
2882 switch (tChannels)
2883 {
2884 case 1u:
2885 {
2886 for (size_t n = 0; n < blocks16; ++n)
2887 {
2888 target -= 16u * tChannels;
2889
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2892
2893 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2896
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2898
2899 source += 16u * tChannels;
2900 }
2901
2902 break;
2903 }
2904
2905 case 2u:
2906 {
2907 for (size_t n = 0; n < blocks16; ++n)
2908 {
2909 target -= 16u * tChannels;
2910
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2913
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2916
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2919
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2922
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2925
2926 source += 16u * tChannels;
2927 }
2928
2929 break;
2930 }
2931
2932 case 3u:
2933 {
2934 for (size_t n = 0; n < blocks16; ++n)
2935 {
2936 target -= 16u * tChannels;
2937
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2940
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)(source));
2942
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2947
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2949
2950 source += 16u * tChannels;
2951 }
2952
2953 break;
2954 }
2955
2956 case 4u:
2957 {
2958 for (size_t n = 0; n < blocks16; ++n)
2959 {
2960 target -= 16u * tChannels;
2961
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2964
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)(source) + 48);
2969
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2974
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2979
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2984
2985 source += 16u * tChannels;
2986 }
2987
2988 break;
2989 }
2990
2991 default:
2992 break;
2993 }
2994 }
2995
2996#endif // OCEAN_HARDWARE_NEON_VERSION
2997
2998 while (source != sourceEnd)
2999 {
3000 ocean_assert(source < sourceEnd);
3001
3002 for (unsigned int n = 0u; n < tChannels; ++n)
3003 {
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3006
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3008
3009 *--target = source[tChannels - n - 1u];
3010 }
3011
3012 source += tChannels;
3013 }
3014}
3015
3016template <typename T, unsigned int tChannels>
3017void FrameChannels::reverseRowPixelOrderInPlace(T* data, const size_t size)
3018{
3019 static_assert(tChannels >= 1u, "Invalid channel number!");
3020
3021 ocean_assert(data != nullptr);
3022 ocean_assert(size >= 1);
3023
3024 using PixelType = typename DataType<T, tChannels>::Type;
3025
3026 size_t n = 0;
3027
3028#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3029
3030 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3031 {
3032 if (size >= 32)
3033 {
3034 const size_t blocks32 = size / size_t(32);
3035
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3038
3039 switch (tChannels)
3040 {
3041 case 1u:
3042 {
3043 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3044 {
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3047
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3050
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3053
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3056
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3059 }
3060
3061 n += blocks32 * 16u;
3062
3063 break;
3064 }
3065
3066 case 2u:
3067 {
3068 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3069 {
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3072
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3078
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3084
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3087
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3090 }
3091
3092 n += blocks32 * 16u;
3093
3094 break;
3095 }
3096
3097 case 3u:
3098 {
3099 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3100 {
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3103
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3111
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3119
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3122
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3125 }
3126
3127 n += blocks32 * 16u;
3128
3129 break;
3130 }
3131
3132 case 4u:
3133 {
3134 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3135 {
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3138
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3148
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3158
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3161
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3164 }
3165
3166 n += blocks32 * 16u;
3167
3168 break;
3169 }
3170
3171 default:
3172 break;
3173 }
3174 }
3175 }
3176
3177#endif
3178
3179 PixelType intermediate;
3180
3181 PixelType* const pixels = (PixelType*)(data);
3182
3183 while (n < size / 2)
3184 {
3185 intermediate = pixels[n];
3186
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3189
3190 ++n;
3191 }
3192}
3193
3194template <typename T, unsigned int tChannels>
3195void FrameChannels::reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* /*options*/)
3196{
3197 ocean_assert(source != nullptr && target != nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3200
3201#ifdef OCEAN_DEBUG
3202 const T* const debugSourceStart = source;
3203 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
3204
3205 const T* const debugTargetStart = target;
3206 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
3207#endif
3208
3209 if constexpr (tChannels == 1)
3210 {
3211 // we actually copy the one channel
3212
3213 memcpy(target, source, sizeof(T) * size);
3214 return;
3215 }
3216
3217 const T* const sourceEnd = source + size * tChannels;
3218
3219#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3220
3221 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3222 {
3223 const size_t blocks16 = size / size_t(16);
3224
3225 switch (tChannels)
3226 {
3227 case 1u:
3228 ocean_assert(false && "This should have been handled above!");
3229 break;
3230
3231 case 2u:
3232 {
3233 for (size_t n = 0; n < blocks16; ++n)
3234 {
3235 SSE::reverseChannelOrder2Channel8Bit32Elements((const uint8_t*)source, (uint8_t*)target);
3236
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3239 }
3240
3241 break;
3242 }
3243
3244 case 3u:
3245 {
3246 for (size_t n = 0; n < blocks16; ++n)
3247 {
3248 SSE::reverseChannelOrder3Channel8Bit48Elements((const uint8_t*)source, (uint8_t*)target);
3249
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3252 }
3253
3254 break;
3255 }
3256
3257 case 4u:
3258 {
3259 for (size_t n = 0; n < blocks16; ++n)
3260 {
3261 SSE::reverseChannelOrder4Channel8Bit64Elements((const uint8_t*)source, (uint8_t*)target);
3262
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3265 }
3266
3267 break;
3268 }
3269
3270 default:
3271 break;
3272 }
3273 }
3274
3275#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3276
3277 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3278 {
3279 const size_t blocks16 = size / size_t(16);
3280
3281 switch (tChannels)
3282 {
3283 case 1u:
3284 ocean_assert(false && "This should have been handled above!");
3285 break;
3286
3287 case 2u:
3288 {
3289 for (size_t n = 0; n < blocks16; ++n)
3290 {
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3293
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3296
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3299
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3302
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3305 }
3306
3307 break;
3308 }
3309
3310 case 3u:
3311 {
3312 for (size_t n = 0; n < blocks16; ++n)
3313 {
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3316
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3318
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3323
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3325
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3328 }
3329
3330 break;
3331 }
3332
3333 case 4u:
3334 {
3335 for (size_t n = 0; n < blocks16; ++n)
3336 {
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3339
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)source + 48);
3344
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3349
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3354
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3357 }
3358
3359 break;
3360 }
3361
3362 default:
3363 break;
3364 }
3365 }
3366
3367#endif // OCEAN_HARDWARE_NEON_VERSION
3368
3369 while (source != sourceEnd)
3370 {
3371 ocean_assert(source < sourceEnd);
3372
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3375
3376 for (unsigned int n = 0u; n < tChannels; ++n)
3377 {
3378 target[n] = source[tChannels - n - 1u];
3379 }
3380
3381 source += tChannels;
3382 target += tChannels;
3383 }
3384}
3385
3386template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3387inline void FrameChannels::shuffleRowChannels(const T* source, T* target, const size_t size, const void* /*unusedOptions*/)
3388{
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3391
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3393
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3402
3403 ocean_assert(source != nullptr && target != nullptr);
3404 ocean_assert(size != 0);
3405
3406 const T* const sourceEnd = source + size * tSourceChannels;
3407
3408#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3409
3410 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3411 {
3412 const size_t blocks16 = size / size_t(16);
3413
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3415 {
3416 // 4 -> 4
3417 case (4u | (4u << 4u)):
3418 {
3419 // the following shuffle patterns are known during compile time
3420
3421 constexpr unsigned int offset1 = 0x04040404u;
3422 constexpr unsigned int offset2 = 0x08080808u;
3423 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3424
3425 // converting shufflePattern16 to shufflePattern16
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3427
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3431
3432 const __m128i shufflePattern128 = SSE::set128i((((unsigned long long)shufflePattern3) << 32ull) | (unsigned long long)shufflePattern2, (((unsigned long long)shufflePattern1) << 32ull) | (unsigned long long)shufflePattern0);
3433
3434 for (size_t n = 0; n < blocks16; ++n)
3435 {
3436 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 0), shufflePattern128), (uint8_t*)target + 0);
3437 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 16), shufflePattern128), (uint8_t*)target + 16);
3438 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 32), shufflePattern128), (uint8_t*)target + 32);
3439 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 48), shufflePattern128), (uint8_t*)target + 48);
3440
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3443 }
3444
3445 break;
3446 }
3447
3448 default:
3449 // we do not have a NEON-based optimization
3450 break;
3451 }
3452 }
3453
3454#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3455
3456 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3457 {
3458 const size_t blocks16 = size / size_t(16);
3459
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3461 {
3462 // 1 -> 3
3463 case (1u | (3u << 4u)):
3464 {
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u, "Invalid shuffle patter!");
3466
3467 for (size_t n = 0; n < blocks16; ++n)
3468 {
3469 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3470
3471 uint8x16x3_t target_u_8x16x3;
3472
3473 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3474 {
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3476 }
3477
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3479
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3482 }
3483
3484 break;
3485 }
3486
3487 // 2 -> 1
3488 case (2u | (1u << 4u)):
3489 {
3490 for (size_t n = 0; n < blocks16; ++n)
3491 {
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3493
3494 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u; // possible index values {0, 1}
3495 static_assert(sourceChannel <= 1u, "Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3497
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3499
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3501
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3504 }
3505
3506 break;
3507 }
3508
3509 // 2 -> 3
3510 case (2u | (3u << 4u)):
3511 {
3512 for (size_t n = 0; n < blocks16; ++n)
3513 {
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3515
3516 uint8x16x3_t target_u_8x16x3;
3517
3518 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3519 {
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3521
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3523 }
3524
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3526
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3529 }
3530
3531 break;
3532 }
3533
3534 // 2 -> 4
3535 case (2u | (4u << 4u)):
3536 {
3537 for (size_t n = 0; n < blocks16; ++n)
3538 {
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3540
3541 uint8x16x4_t target_u_8x16x4;
3542
3543 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3544 {
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3546
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3548 }
3549
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3551
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3554 }
3555
3556 break;
3557 }
3558
3559 // 3 -> 1
3560 case (3u | (1u << 4u)):
3561 {
3562 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u; // possible index values {0, 1, 2}
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3564
3565 for (size_t n = 0; n < blocks16; ++n)
3566 {
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3568
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3570
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3572
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3575 }
3576
3577 break;
3578 }
3579
3580 // 3 -> 2
3581 case (3u | (2u << 4u)):
3582 {
3583 for (size_t n = 0; n < blocks16; ++n)
3584 {
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3586
3587 uint8x16x2_t target_u_8x16x2;
3588
3589 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3590 {
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3592 }
3593
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3595
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3598 }
3599
3600 break;
3601 }
3602
3603 // 3 -> 3
3604 case (3u | (3u << 4u)):
3605 {
3606 for (size_t n = 0; n < blocks16; ++n)
3607 {
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3609
3610 uint8x16x3_t target_u_8x16x3;
3611
3612 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3613 {
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3615 }
3616
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3618
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3621 }
3622
3623 break;
3624 }
3625
3626 // 4 -> 1
3627 case (4u | (1u << 4u)):
3628 {
3629 for (size_t n = 0; n < blocks16; ++n)
3630 {
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3632
3633 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u; // possible index values {0, 1, 2, 3}
3634 static_assert(sourceChannel <= 3u, "Invalid shuffle pattern!");
3635
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3637
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3639
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3641
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3644 }
3645
3646 break;
3647 }
3648
3649 // 4 -> 2
3650 case (4u | (2u << 4u)):
3651 {
3652 for (size_t n = 0; n < blocks16; ++n)
3653 {
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3655
3656 uint8x16x2_t target_u_8x16x2;
3657
3658 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3659 {
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3661
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3663 }
3664
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3666
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3669 }
3670
3671 break;
3672 }
3673
3674 // 4 -> 3
3675 case (4u | (3u << 4u)):
3676 {
3677 for (size_t n = 0; n < blocks16; ++n)
3678 {
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3680
3681 uint8x16x3_t target_u_8x16x3;
3682
3683 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3684 {
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3686
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3688 }
3689
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3691
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3694 }
3695
3696 break;
3697 }
3698
3699 // 4 -> 4
3700 case (4u | (4u << 4u)):
3701 {
3702 for (size_t n = 0; n < blocks16; ++n)
3703 {
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3705
3706 uint8x16x4_t target_u_8x16x4;
3707
3708 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3709 {
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3711
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3713 }
3714
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3716
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3719 }
3720
3721 break;
3722 }
3723
3724 default:
3725 // we do not have a NEON-based optimization
3726 break;
3727 }
3728 }
3729
3730#endif
3731
3732 while (source != sourceEnd)
3733 {
3734 ocean_assert(source < sourceEnd);
3735
3736 for (unsigned int n = 0u; n < tTargetChannels; ++n)
3737 {
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3739 }
3740
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3743 }
3744}
3745
3746template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3747inline void FrameChannels::shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options)
3748{
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3751
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3760
3761 ocean_assert(source != nullptr && target != nullptr);
3762 ocean_assert(size != 0);
3763
3764 ocean_assert(options != nullptr);
3765
3766 const T lastChannelValue = *(const T*)(options);
3767
3768 const T* const sourceEnd = source + size * tSourceChannels;
3769
3770#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3771
3772 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3773 {
3774 const size_t blocks16 = size / size_t(16);
3775
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3777 {
3778 // 1 -> 4
3779 case (1u | (4u << 4u)):
3780 {
3781 ocean_assert(tShufflePattern == 0u);
3782
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3784
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3787
3788 for (size_t n = 0; n < blocks16; ++n)
3789 {
3790 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3791
3792 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3793 {
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3795 }
3796
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3798
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3801 }
3802
3803 break;
3804 }
3805
3806 // 3 -> 4
3807 case (3u | (4u << 4u)):
3808 {
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3810
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3813
3814 for (size_t n = 0; n < blocks16; ++n)
3815 {
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3817
3818 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3819 {
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3821 }
3822
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3824
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3827 }
3828
3829 break;
3830 }
3831
3832 // 4 -> 4
3833 case (4u | (4u << 4u)):
3834 {
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3836
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3839
3840 for (size_t n = 0; n < blocks16; ++n)
3841 {
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3843
3844 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3845 {
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)]; // possible index values {0, 1, 2, 3}
3847 }
3848
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3850
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3853 }
3854
3855 break;
3856 }
3857
3858 default:
3859 // we do not have a NEON-based optimization
3860 break;
3861 }
3862 }
3863
3864#endif
3865
3866 while (source != sourceEnd)
3867 {
3868 ocean_assert(source < sourceEnd);
3869
3870 for (unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3871 {
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3874 }
3875
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3878 }
3879}
3880
3881template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3882inline void FrameChannels::shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3883{
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3886
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3888
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3897
3898 ocean_assert(source != nullptr && target != nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3900
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3903
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3905
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, nullptr, worker);
3907}
3908
3909template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3910inline void FrameChannels::shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3911{
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3914
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3923
3924 ocean_assert(source != nullptr && target != nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3926
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3929
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3931
3932 const T options = newChannelValue;
3933
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3935}
3936
3937template <unsigned int tChannels>
3938inline void FrameChannels::narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3939{
3940 static_assert(tChannels >= 1u, "Invalid channel number!");
3941
3942 ocean_assert(source != nullptr && target != nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3944
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3947
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3949
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous, nullptr, worker);
3951}
3952
3953template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
3954void FrameChannels::applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker)
3955{
3956 static_assert(tChannels > 0u, "Invalid channel number!");
3957
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3960
3961 if (worker)
3962 {
3963 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3964 }
3965 else
3966 {
3967 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3968 }
3969}
3970
3971template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
3972void FrameChannels::applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3973{
3974 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3975 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3976
3977 ocean_assert(source && target);
3978 ocean_assert(width != 0u && height != 0u);
3979
3980 if (worker)
3981 {
3982 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3983 }
3984 else
3985 {
3986 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3987 }
3988}
3989
3990template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3991void FrameChannels::applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3992{
3993 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3994 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3995
3996 ocean_assert(source0 && source1 && target);
3997 ocean_assert(width != 0u && height != 0u);
3998
3999 if (worker)
4000 {
4001 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
4002 }
4003 else
4004 {
4005 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4006 }
4007}
4008
4009template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4010void FrameChannels::applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker)
4011{
4012 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
4013 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
4014
4015 ocean_assert(source != nullptr && target != nullptr);
4016 ocean_assert(width != 0u && height != 0u);
4017
4018 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4019 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4020
4021 if (worker)
4022 {
4023 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4024 }
4025 else
4026 {
4027 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4028 }
4029}
4030
4031template <typename T, unsigned int tChannels>
4032inline void FrameChannels::transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4033{
4034 ocean_assert(source != nullptr && target != nullptr);
4035 ocean_assert(width >= 1u && height >= 1u);
4036
4037 const unsigned int bytesPerRow = width * sizeof(T) * tChannels;
4038
4039 const unsigned int sourceStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * sourcePaddingElements;
4040 const unsigned int targetStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * targetPaddingElements;
4041
4042 using MappedType = typename TypeMapper<T>::Type;
4043
4044 const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction = (const RowReversePixelOrderFunction<void>)(FrameChannels::reverseRowPixelOrder<MappedType, tChannels>);
4045
4046 if (worker && height > 200u)
4047 {
4048 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::transformGenericSubset, (const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4049 }
4050 else
4051 {
4052 transformGenericSubset((const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4053 }
4054}
4055
4056template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4057void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4058{
4059 static_assert(tChannels >= 2u, "Invalid channel number!");
4060 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4061
4062 ocean_assert(frame != nullptr);
4063 ocean_assert(width >= 1u && height >= 1u);
4064
4065 if (worker && height > 200u)
4066 {
4067 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4068 }
4069 else
4070 {
4071 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4072 }
4073}
4074
4075template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4076void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4077{
4078 static_assert(tChannels >= 2u, "Invalid channel number!");
4079 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4080
4081 ocean_assert(source != nullptr && target != nullptr);
4082 ocean_assert(width >= 1u && height >= 1u);
4083
4084 if (worker && height > 200u)
4085 {
4086 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4087 }
4088 else
4089 {
4090 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4091 }
4092}
4093
4094template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4095void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4096{
4097 static_assert(tChannels >= 2u, "Invalid channel number!");
4098 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4099
4100 ocean_assert(frame != nullptr);
4101 ocean_assert(width >= 1u && height >= 1u);
4102
4103 if (worker && height > 200u)
4104 {
4105 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4106 }
4107 else
4108 {
4109 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4110 }
4111}
4112
4113template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4114void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4115{
4116 static_assert(tChannels >= 2u, "Invalid channel number!");
4117 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4118
4119 ocean_assert(source != nullptr && target != nullptr);
4120 ocean_assert(width >= 1u && height >= 1u);
4121
4122 if (worker && height > 200u)
4123 {
4124 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4125 }
4126 else
4127 {
4128 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4129 }
4130}
4131
4132template <unsigned int tChannels>
4133void FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* /* unusedParameters */)
4134{
4135 static_assert(tChannels >= 1u, "Invalid channel number!");
4136
4137 ocean_assert(source != nullptr && target != nullptr);
4138 ocean_assert(size > 0);
4139
4140 const uint16_t* const sourceEnd = source + size * tChannels;
4141
4142#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4143
4144 const size_t blocks8 = size / size_t(8);
4145
4146 switch (tChannels)
4147 {
4148 case 4u:
4149 {
4150 for (size_t n = 0; n < blocks8; ++n)
4151 {
4152 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4153 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4154 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4155 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4156
4157 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8)); // narrowing rounded right shift: target = (source + 128) / 256
4158 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4159
4160 vst1q_u8(target + 0, targetAB_u_8x16);
4161 vst1q_u8(target + 16, targetCD_u_8x16);
4162
4163 source += 8u * tChannels;
4164 target += 8u * tChannels;
4165 }
4166
4167 break;
4168 }
4169
4170 default:
4171 break;
4172 }
4173
4174#endif
4175
4176 while (source != sourceEnd)
4177 {
4178 ocean_assert(source < sourceEnd);
4179
4180 for (unsigned int n = 0u; n < tChannels; ++n)
4181 {
4182 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4183 target[n] = (uint8_t)(source[n] >> 8u);
4184 }
4185
4186 source += tChannels;
4187 target += tChannels;
4188 }
4189}
4190
4191template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4192void FrameChannels::addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options)
4193{
4194 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4195 static_assert(sizeof(size_t) == sizeof(const T*), "Invalid pointer size!");
4196
4197 ocean_assert(sources != nullptr && targets != nullptr);
4198 ocean_assert(width != 0u && height != 0u);
4199 ocean_assert(multipleRowIndex < height);
4200 ocean_assert(options != nullptr);
4201
4202 const T* source = (const T*)(sources[0]);
4203 const T* sourceOneChannel = (const T*)(sources[1]);
4204 ocean_assert(source != nullptr && sourceOneChannel != nullptr);
4205
4206 T* target = (T*)(targets[0]);
4207 ocean_assert(target != nullptr);
4208
4209 const unsigned int* uintOptions = (const unsigned int*)options;
4210 ocean_assert(uintOptions != nullptr);
4211
4212 const unsigned int sourcePaddingElements = uintOptions[0];
4213 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4214 const unsigned int targetPaddingElements = uintOptions[2];
4215
4216 const unsigned int targetChannels = tSourceChannels + 1u;
4217
4218 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4219 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4220 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4221
4222 const bool flipTarget = conversionFlag == CONVERT_FLIPPED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4223 const bool mirrorTarget = conversionFlag == CONVERT_MIRRORED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4224
4225 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4226 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4227 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4228
4229 if (mirrorTarget == false)
4230 {
4231 for (unsigned int n = 0u; n < width; ++n)
4232 {
4233 if constexpr (tAddToFront)
4234 {
4235 targetRow[0] = sourceOneChannelRow[0];
4236
4237 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4238 {
4239 targetRow[c + 1u] = sourceRow[c];
4240 }
4241 }
4242 else
4243 {
4244 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4245 {
4246 targetRow[c] = sourceRow[c];
4247 }
4248
4249 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4250 }
4251
4252 sourceRow += tSourceChannels;
4253 sourceOneChannelRow++;
4254
4255 targetRow += targetChannels;
4256 }
4257 }
4258 else
4259 {
4260 targetRow += targetChannels * (width - 1u);
4261
4262 for (unsigned int n = 0u; n < width; ++n)
4263 {
4264 if constexpr (tAddToFront)
4265 {
4266 targetRow[0] = sourceOneChannelRow[0];
4267
4268 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4269 {
4270 targetRow[c + 1u] = sourceRow[c];
4271 }
4272 }
4273 else
4274 {
4275 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4276 {
4277 targetRow[c] = sourceRow[c];
4278 }
4279
4280 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4281 }
4282
4283 sourceRow += tSourceChannels;
4284 sourceOneChannelRow++;
4285
4286 targetRow -= targetChannels;
4287 }
4288 }
4289}
4290
4291template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4292void FrameChannels::addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter)
4293{
4294 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4295
4296 ocean_assert(source != nullptr && target != nullptr);
4297 ocean_assert(size > 0);
4298 ocean_assert(channelValueParameter != nullptr);
4299
4300 const T& channelValue = *((const T*)channelValueParameter);
4301
4302 const unsigned int targetChannels = tSourceChannels + 1u;
4303
4304 for (size_t n = 0; n < size; ++n)
4305 {
4306 if constexpr (tAddToFront)
4307 {
4308 target[0] = channelValue;
4309
4310 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4311 {
4312 target[c + 1u] = source[c];
4313 }
4314 }
4315 else
4316 {
4317 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4318 {
4319 target[c] = source[c];
4320 }
4321
4322 target[tSourceChannels] = channelValue;
4323 }
4324
4325 source += tSourceChannels;
4326 target += targetChannels;
4327 }
4328}
4329
4330template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
4331void FrameChannels::copyChannelRow(const T* source, T* target, const size_t size, const void* /*unusedParameters*/)
4332{
4333 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4334 static_assert(tTargetChannels != 0u, "Invalid channel number!");
4335
4336 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel number!");
4337 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel number!");
4338
4339 ocean_assert(source != nullptr && target != nullptr);
4340 ocean_assert(size > 0);
4341
4342 for (size_t n = 0; n < size; ++n)
4343 {
4344 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4345
4346 source += tSourceChannels;
4347 target += tTargetChannels;
4348 }
4349}
4350
4351template <typename TSource, typename TTarget>
4352void FrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
4353{
4354 ocean_assert(sourceFrame != nullptr);
4355 ocean_assert(targetFrames != nullptr);
4356
4357 ocean_assert(width != 0u && height != 0u);
4358 ocean_assert(channels != 0u);
4359
4360#ifdef OCEAN_DEBUG
4361 for (unsigned int c = 0u; c < channels; ++c)
4362 {
4363 ocean_assert(targetFrames[c] != nullptr);
4364 }
4365#endif
4366
4367 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
4368 {
4369 for (unsigned int n = 0u; n < width * height; ++n)
4370 {
4371 for (unsigned int c = 0u; c < channels; ++c)
4372 {
4373 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4374 }
4375 }
4376 }
4377 else if (targetFramesPaddingElements == nullptr)
4378 {
4379 ocean_assert(sourceFramePaddingElements != 0u);
4380
4381 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4382
4383 for (unsigned int y = 0u; y < height; ++y)
4384 {
4385 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4386
4387 const unsigned int targetRowOffset = y * width;
4388
4389 for (unsigned int x = 0u; x < width; ++x)
4390 {
4391 for (unsigned int c = 0u; c < channels; ++c)
4392 {
4393 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4394 }
4395 }
4396 }
4397 }
4398 else
4399 {
4400 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4401
4402 Indices32 targetFrameStrideElements(channels);
4403
4404 for (unsigned int c = 0u; c < channels; ++c)
4405 {
4406 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4407 }
4408
4409 for (unsigned int y = 0u; y < height; ++y)
4410 {
4411 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4412
4413 for (unsigned int x = 0u; x < width; ++x)
4414 {
4415 for (unsigned int c = 0u; c < channels; ++c)
4416 {
4417 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4418 }
4419 }
4420 }
4421 }
4422}
4423
4424template <typename TSource, typename TTarget>
4425void FrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
4426{
4427 ocean_assert(sourceFrames != nullptr);
4428 ocean_assert(targetFrame != nullptr);
4429
4430 ocean_assert(width != 0u && height != 0u);
4431 ocean_assert(channels != 0u);
4432
4433 bool allSourceFramesContinuous = true;
4434
4435 if (sourceFramesPaddingElements != nullptr)
4436 {
4437 for (unsigned int n = 0u; n < channels; ++n)
4438 {
4439 if (sourceFramesPaddingElements[n] != 0u)
4440 {
4441 allSourceFramesContinuous = false;
4442 break;
4443 }
4444 }
4445 }
4446
4447 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4448 {
4449 for (unsigned int n = 0u; n < width * height; ++n)
4450 {
4451 for (unsigned int c = 0u; c < channels; ++c)
4452 {
4453 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4454 }
4455 }
4456 }
4457 else
4458 {
4459 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4460
4461 Indices32 sourceFrameStrideElements(channels);
4462
4463 for (unsigned int c = 0u; c < channels; ++c)
4464 {
4465 if (sourceFramesPaddingElements == nullptr)
4466 {
4467 sourceFrameStrideElements[c] = width;
4468 }
4469 else
4470 {
4471 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4472 }
4473 }
4474
4475 for (unsigned int y = 0u; y < height; ++y)
4476 {
4477 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
4478
4479 for (unsigned int x = 0u; x < width; ++x)
4480 {
4481 for (unsigned int c = 0u; c < channels; ++c)
4482 {
4483 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4484 }
4485 }
4486 }
4487 }
4488}
4489
4490template <typename T, unsigned int tChannel, unsigned int tChannels>
4491void FrameChannels::setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4492{
4493 static_assert(tChannels >= 1u, "Invalid channel number!");
4494 static_assert(tChannel < tChannels, "Invalid channel index!");
4495
4496 ocean_assert(frame != nullptr);
4497
4498 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4499
4500 frame += firstRow * frameStrideElements + tChannel;
4501
4502 for (unsigned int n = 0u; n < numberRows; ++n)
4503 {
4504 for (unsigned int x = 0u; x < width; ++x)
4505 {
4506 frame[x * tChannels] = value;
4507 }
4508
4509 frame += frameStrideElements;
4510 }
4511}
4512
4513template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
4514void FrameChannels::applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4515{
4516 static_assert(tChannels >= 1u, "Invalid channel number");
4517
4518 ocean_assert(source && target);
4519 ocean_assert(source != target);
4520
4521 ocean_assert(numberRows > 0u);
4522 ocean_assert(firstRow + numberRows <= height);
4523
4524 const unsigned int widthElements = width * tChannels;
4525 const unsigned int targetBlockSize = widthElements * numberRows;
4526
4527 switch (conversionFlag)
4528 {
4529 case CONVERT_NORMAL:
4530 {
4531 source += firstRow * widthElements;
4532 target += firstRow * widthElements;
4533
4534 const T* const targetEnd = target + targetBlockSize;
4535
4536 while (target != targetEnd)
4537 {
4538 tPixelFunction(source, target);
4539
4540 source += tChannels;
4541 target += tChannels;
4542 }
4543
4544 break;
4545 }
4546
4547 case CONVERT_FLIPPED:
4548 {
4549 source += firstRow * widthElements;
4550 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4551
4552 const T* const targetEnd = target - targetBlockSize;
4553
4554 while (target != targetEnd)
4555 {
4556 const T* const targetRowEnd = target + widthElements;
4557
4558 while (target != targetRowEnd)
4559 {
4560 tPixelFunction(source, target);
4561
4562 source += tChannels;
4563 target += tChannels;
4564 }
4565
4566 target -= (widthElements << 1); // width * tChannels * 2
4567 }
4568
4569 break;
4570 }
4571
4572 case CONVERT_MIRRORED:
4573 {
4574 source += firstRow * widthElements;
4575 target += (firstRow + 1u) * widthElements;
4576
4577 const T* const targetEnd = target + targetBlockSize;
4578
4579 while (target != targetEnd)
4580 {
4581 const T* const targetRowEnd = target - widthElements;
4582
4583 while (target != targetRowEnd)
4584 {
4585 tPixelFunction(source, target -= tChannels);
4586
4587 source += tChannels;
4588 }
4589
4590 target += widthElements << 1; // width * tChannels * 2;
4591 }
4592
4593 break;
4594 }
4595
4597 {
4598 source += firstRow * widthElements;
4599 target += width * height * tChannels - firstRow * widthElements;
4600
4601 const T* const targetEnd = target - targetBlockSize;
4602
4603 while (target != targetEnd)
4604 {
4605 tPixelFunction(source, target -= tChannels);
4606
4607 source += tChannels;
4608 }
4609
4610 break;
4611 }
4612
4613 default: // this case is not handled
4614 break;
4615 }
4616}
4617
4618template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
4619void FrameChannels::applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4620{
4621 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4622 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4623
4624 ocean_assert(source && target);
4625 ocean_assert((void*)source != (void*)target);
4626
4627 ocean_assert(numberRows != 0u);
4628 ocean_assert(firstRow + numberRows <= height);
4629
4630 const unsigned int sourceWidthElements = width * tSourceChannels;
4631 const unsigned int targetWidthElements = width * tTargetChannels;
4632
4633 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4634 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4635
4636 switch (conversionFlag)
4637 {
4638 case CONVERT_NORMAL:
4639 {
4640 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4641 {
4642 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4643 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4644
4645 for (unsigned int x = 0u; x < width; ++x)
4646 {
4647 tPixelFunction(sourcePixel, targetPixel);
4648
4649 sourcePixel += tSourceChannels;
4650 targetPixel += tTargetChannels;
4651 }
4652 }
4653
4654 break;
4655 }
4656
4657 case CONVERT_FLIPPED:
4658 {
4659 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4660 {
4661 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4662 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4663
4664 for (unsigned int x = 0u; x < width; ++x)
4665 {
4666 tPixelFunction(sourcePixel, targetPixel);
4667
4668 sourcePixel += tSourceChannels;
4669 targetPixel += tTargetChannels;
4670 }
4671 }
4672
4673 break;
4674 }
4675
4676 case CONVERT_MIRRORED:
4677 {
4678 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4679 {
4680 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4681
4682 TTarget* const targetRowBegin = target + rowIndex * targetStrideElements;
4683 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4684
4685 for (unsigned int x = 0u; x < width; ++x)
4686 {
4687 ocean_assert(targetPixel >= targetRowBegin);
4688 tPixelFunction(sourcePixel, targetPixel);
4689
4690 sourcePixel += tSourceChannels;
4691 targetPixel -= tTargetChannels;
4692 }
4693 }
4694
4695 break;
4696 }
4697
4699 {
4700 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4701 {
4702 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4703
4704 TTarget* const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4705 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4706
4707 for (unsigned int x = 0u; x < width; ++x)
4708 {
4709 ocean_assert(targetPixel >= targetRowBegin);
4710 tPixelFunction(sourcePixel, targetPixel);
4711
4712 sourcePixel += tSourceChannels;
4713 targetPixel -= tTargetChannels;
4714 }
4715 }
4716
4717 break;
4718 }
4719
4720 default: // this case is not handled
4721 break;
4722 }
4723}
4724
4725template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4726void FrameChannels::applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4727{
4728 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4729 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4730 static_assert(tOperator, "Invalid operator function");
4731
4732 ocean_assert(source0 != nullptr && source1 != nullptr && target != nullptr);
4733 ocean_assert((const void*)(source0) != (const void*)(target));
4734 ocean_assert((const void*)(source1) != (const void*)(target));
4735
4736 ocean_assert(numberRows != 0u);
4737 ocean_assert(firstRow + numberRows <= height);
4738
4739 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4740 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4741
4742 const unsigned int targetWidthElements = width * tTargetChannels;
4743
4744 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4745
4746 switch (conversionFlag)
4747 {
4748 case CONVERT_NORMAL:
4749 {
4750 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4751 {
4752 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4753 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4754
4755 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4756 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4757
4758 while (rowTarget != rowTargetEnd)
4759 {
4760 ocean_assert(rowTarget < rowTargetEnd);
4761
4762 tOperator(rowSource0, rowSource1, rowTarget);
4763
4764 rowSource0 += tSourceChannels;
4765 rowSource1 += tSourceChannels;
4766
4767 rowTarget += tTargetChannels;
4768 }
4769 }
4770
4771 return;
4772 }
4773
4774 case CONVERT_FLIPPED:
4775 {
4776 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4777 {
4778 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4779 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4780
4781 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4782 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4783
4784 while (rowTarget != rowTargetEnd)
4785 {
4786 ocean_assert(rowTarget < rowTargetEnd);
4787
4788 tOperator(rowSource0, rowSource1, rowTarget);
4789
4790 rowSource0 += tSourceChannels;
4791 rowSource1 += tSourceChannels;
4792
4793 rowTarget += tTargetChannels;
4794 }
4795 }
4796
4797 return;
4798 }
4799
4800 case CONVERT_MIRRORED:
4801 {
4802 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4803 {
4804 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4805 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4806
4807 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4808 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4809
4810 while (rowTarget != rowTargetEnd)
4811 {
4812 ocean_assert(rowTarget > rowTargetEnd);
4813
4814 tOperator(rowSource0, rowSource1, rowTarget);
4815
4816 rowSource0 += tSourceChannels;
4817 rowSource1 += tSourceChannels;
4818
4819 rowTarget -= tTargetChannels;
4820 }
4821 }
4822
4823 return;
4824 }
4825
4827 {
4828 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4829 {
4830 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4831 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4832
4833 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4834 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4835
4836 while (rowTarget != rowTargetEnd)
4837 {
4838 ocean_assert(rowTarget > rowTargetEnd);
4839
4840 tOperator(rowSource0, rowSource1, rowTarget);
4841
4842 rowSource0 += tSourceChannels;
4843 rowSource1 += tSourceChannels;
4844
4845 rowTarget -= tTargetChannels;
4846 }
4847 }
4848
4849 return;
4850 }
4851
4852 default:
4853 ocean_assert(false && "This should never happen!");
4854 break;
4855 }
4856}
4857
4858template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4859void FrameChannels::applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
4860{
4861 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4862 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4863
4864 ocean_assert(source != nullptr && target != nullptr);
4865 ocean_assert((const void*)source != (const void*)target);
4866
4867 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4868 ocean_assert(width * tTargetChannels <= targetStrideElements);
4869
4870 ocean_assert(rowOperatorFunction != nullptr);
4871
4872 ocean_assert(numberRows != 0u);
4873 ocean_assert(firstRow + numberRows <= height);
4874
4875 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4876 {
4877 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4878 }
4879}
4880
4881template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
4882void FrameChannels::convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4883{
4884 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid channel factors!");
4885
4886 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4887 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4888 ocean_assert(channelFactors_128 != nullptr);
4889
4890 const unsigned int factorChannel0_128 = channelFactors_128[0];
4891 const unsigned int factorChannel1_128 = channelFactors_128[1];
4892 const unsigned int factorChannel2_128 = channelFactors_128[2];
4893
4894 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4895 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4896
4897 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4898 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4899 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4900
4901 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4902
4903 const uint8_t* const targetEnd = target + size;
4904
4905#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4906
4907 constexpr size_t blockSize = 16;
4908 const size_t blocks = size / blockSize;
4909
4910 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4911 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4912 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4913
4914 for (size_t n = 0; n < blocks; ++n)
4915 {
4916 convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, multiplicationFactors0_128_u_16x8, multiplicationFactors1_128_u_16x8, multiplicationFactors2_128_u_16x8);
4917
4918 source += blockSize * size_t(3);
4919 target += blockSize;
4920 }
4921
4922#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4923
4924 constexpr size_t blockSize = 8;
4925 const size_t blocks = size / blockSize;
4926
4927 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4928 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4929 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4930
4931 for (size_t n = 0; n < blocks; ++n)
4932 {
4933 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4934
4935 source += blockSize * size_t(3);
4936 target += blockSize;
4937 }
4938
4939#endif
4940
4941 while (target != targetEnd)
4942 {
4943 ocean_assert(target < targetEnd);
4944
4945 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4946 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4947 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4948
4949 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4950 source += 3;
4951 }
4952}
4953
4954template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
4955void FrameChannels::convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4956{
4957 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid channel factors!");
4958
4959 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4960 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4961 ocean_assert(channelFactors_128 != nullptr);
4962
4963 const unsigned int factorChannel0_128 = channelFactors_128[0];
4964 const unsigned int factorChannel1_128 = channelFactors_128[1];
4965 const unsigned int factorChannel2_128 = channelFactors_128[2];
4966 const unsigned int factorChannel3_128 = channelFactors_128[3];
4967
4968 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4969 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4970
4971 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4972 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4973 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4974 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4975
4976 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4977
4978 const uint8_t* const targetEnd = target + size;
4979
4980#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4981
4982 constexpr size_t blockSize = 16;
4983 const size_t blocks = size / blockSize;
4984
4985 const __m128i m128_multiplicationFactors = _mm_set1_epi32(int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4986
4987 for (size_t n = 0; n < blocks; ++n)
4988 {
4989 convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, m128_multiplicationFactors);
4990
4991 source += blockSize * size_t(4);
4992 target += blockSize;
4993 }
4994
4995#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4996
4997 constexpr size_t blockSize = 8;
4998 const size_t blocks = size / blockSize;
4999
5000 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
5001 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
5002 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
5003 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
5004
5005 for (size_t n = 0; n < blocks; ++n)
5006 {
5007 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5008
5009 source += blockSize * size_t(4);
5010 target += blockSize;
5011 }
5012
5013#endif
5014
5015 while (target != targetEnd)
5016 {
5017 ocean_assert(target < targetEnd);
5018
5019 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5020 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5021 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5022 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5023
5024 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5025 source += 4;
5026 }
5027}
5028
5029template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5030void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5031{
5032 static_assert(tChannels >= 2u, "Invalid channel number!");
5033 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5034
5035 ocean_assert(frame != nullptr);
5036 ocean_assert(width >= 1u);
5037
5038 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5039
5040 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5041
5042 for (unsigned int y = 0u; y < numberRows; ++y)
5043 {
5044 for (unsigned int x = 0u; x < width; ++x)
5045 {
5046 if (frameRow[tAlphaChannelIndex])
5047 {
5048 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5049
5050 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5051 {
5052 if (channelIndex != tAlphaChannelIndex)
5053 {
5054 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5055 }
5056 }
5057 }
5058
5059 frameRow += tChannels;
5060 }
5061
5062 frameRow += framePaddingElements;
5063 }
5064}
5065
5066template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5067void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5068{
5069 static_assert(tChannels >= 2u, "Invalid channel number!");
5070 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5071
5072 ocean_assert(source != nullptr && target != nullptr);
5073 ocean_assert(width >= 1u);
5074
5075 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5076 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5077
5078 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5079 uint8_t* targetRow = target + targetStrideElements * firstRow;
5080
5081 for (unsigned int y = 0u; y < numberRows; ++y)
5082 {
5083 for (unsigned int x = 0u; x < width; ++x)
5084 {
5085 if (sourceRow[tAlphaChannelIndex])
5086 {
5087 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5088
5089 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5090 {
5091 if (channelIndex != tAlphaChannelIndex)
5092 {
5093 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5094 }
5095 else
5096 {
5097 targetRow[channelIndex] = sourceRow[channelIndex];
5098 }
5099 }
5100 }
5101 else
5102 {
5103 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5104 {
5105 targetRow[channelIndex] = sourceRow[channelIndex];
5106 }
5107 }
5108
5109 sourceRow += tChannels;
5110 targetRow += tChannels;
5111 }
5112
5113 sourceRow += sourcePaddingElements;
5114 targetRow += targetPaddingElements;
5115 }
5116}
5117
5118template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5119void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5120{
5121 static_assert(tChannels >= 2u, "Invalid channel number!");
5122 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5123
5124 ocean_assert(frame != nullptr);
5125 ocean_assert(width >= 1u);
5126
5127 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5128
5129 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5130
5131 for (unsigned int y = 0u; y < numberRows; ++y)
5132 {
5133 for (unsigned int x = 0u; x < width; ++x)
5134 {
5135 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5136 {
5137 if (channelIndex != tAlphaChannelIndex)
5138 {
5139 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5140 }
5141 }
5142
5143 frameRow += tChannels;
5144 }
5145
5146 frameRow += framePaddingElements;
5147 }
5148}
5149
5150template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5151void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5152{
5153 static_assert(tChannels >= 2u, "Invalid channel number!");
5154 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5155
5156 ocean_assert(source != nullptr && target != nullptr);
5157 ocean_assert(width >= 1u);
5158
5159 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5160 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5161
5162 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5163 uint8_t* targetRow = target + targetStrideElements * firstRow;
5164
5165 for (unsigned int y = 0u; y < numberRows; ++y)
5166 {
5167 for (unsigned int x = 0u; x < width; ++x)
5168 {
5169 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5170 {
5171 if (channelIndex != tAlphaChannelIndex)
5172 {
5173 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5174 }
5175 else
5176 {
5177 targetRow[channelIndex] = sourceRow[channelIndex];
5178 }
5179 }
5180
5181 sourceRow += tChannels;
5182 targetRow += tChannels;
5183 }
5184
5185 sourceRow += sourcePaddingElements;
5186 targetRow += targetPaddingElements;
5187 }
5188}
5189
5190#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5191
5192OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8)
5193{
5194 ocean_assert(source != nullptr && target != nullptr);
5195
5196 // the documentation of this function is designed for RGB24 to Y8 conversion
5197 // however, in general this function can be used to apply a linear combination on the four source channels
5198 // to create one output channel
5199
5200 // precise color space conversion:
5201 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5202
5203 // approximation:
5204 // Y = (38 * R + 75 * G + 15 * B) / 128
5205
5206 // we expect the following input pattern (for here RGB24):
5207 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5208 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5209
5210 // we store eight 16 bit values holding 64 for rounding purpose:
5211 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5212
5213 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5214 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5215 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5216
5217 __m128i channel0_u_8x16;
5218 __m128i channel1_u_8x16;
5219 __m128i channel2_u_8x16;
5220 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5221
5222 // now we need 16 bit values instead of 8 bit values
5223
5224 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5225 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5226 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5227
5228 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5229 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5230 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5231
5232 // we multiply each channel with the corresponding multiplication factors
5233
5234 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5235 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5236
5237 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5238 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5239
5240 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5241 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5242
5243 // we sum up all results and add 64 for rounding purpose
5244 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5245 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5246
5247 // we shift the multiplication results by 7 bits (= 128)
5248 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5249 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5250
5251 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5252 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5253
5254 // and we can store the result
5255 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5256}
5257
5258OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8)
5259{
5260 ocean_assert(source != nullptr && target != nullptr);
5261
5262 // the documentation of this function designed for RGB24 to YUV24 conversion
5263
5264 // precise color space conversion:
5265 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5266 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5267 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5268 // | 1 |
5269
5270 // approximation:
5271 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5272 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5273 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5274
5275 // we expect the following input pattern (for here RGB24):
5276 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5277 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5278
5279 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5280 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5281 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5282
5283 __m128i channel0_u_8x16;
5284 __m128i channel1_u_8x16;
5285 __m128i channel2_u_8x16;
5286 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5287
5288 // now we need 16 bit values instead of 8 bit values
5289
5290 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5291 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5292 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5293
5294 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5295 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5296 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5297
5298 // we multiply each channel with the corresponding multiplication factors
5299
5300 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5301 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5302 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5303
5304 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5305 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5306 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5307
5308 // we normalize the result by 128 and add the bias
5309
5310 result0_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_low_u_8x16, 7), biasChannel0_s_16x8);
5311 result1_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_low_u_8x16, 7), biasChannel1_s_16x8);
5312 result2_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_low_u_8x16, 7), biasChannel2_s_16x8);
5313
5314 result0_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_high_u_8x16, 7), biasChannel0_s_16x8);
5315 result1_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_high_u_8x16, 7), biasChannel1_s_16x8);
5316 result2_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_high_u_8x16, 7), biasChannel2_s_16x8);
5317
5318 // from here, we need values within the range [0, 255], so that we clamp the results
5319
5320 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5321
5322 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5324 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5325
5326 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5327 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5328 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5329
5330 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5331 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5332 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5333 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5334
5335 __m128i resultA_u_8x16;
5336 __m128i resultB_u_8x16;
5337 __m128i resultC_u_8x16;
5338 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5339
5340 // and we can store the result
5341 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5342 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5343 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5344}
5345
5346OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4)
5347{
5348 ocean_assert(source != nullptr && target != nullptr);
5349
5350 // the documentation of this function designed for RGB24 to YUV24 conversion
5351
5352 /// precise color space conversion:
5353 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5354 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5355 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5356 // | 1 |
5357
5358 // approximation:
5359 // | R | | 1192 0 1634 -223 | | Y |
5360 // | G | = | 1192 -400 -833 135 | * | U |
5361 // | B | | 1192 2066 0 -277 | | V |
5362 // | 1 |
5363
5364 // we expect the following input pattern (for here RGB24):
5365 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5366 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5367
5368 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5369 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5370 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5371
5372 __m128i channel0_u_8x16;
5373 __m128i channel1_u_8x16;
5374 __m128i channel2_u_8x16;
5375 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5376
5377
5378 // now we need 16 bit values instead of 8 bit values
5379
5380 const __m128i channel0_low_u_16x8 = SSE::removeHighBits16_8(channel0_u_8x16);
5381 const __m128i channel1_low_u_16x8 = SSE::removeHighBits16_8(channel1_u_8x16);
5382 const __m128i channel2_low_u_16x8 = SSE::removeHighBits16_8(channel2_u_8x16);
5383
5384 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5385 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5386 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5387
5388
5389 // we multiply each channel with the corresponding multiplication factors (int16_t * int16_t = int32_t), and we normalize the result by 1024
5390
5391 __m128i result0_low_A_s_32x4;
5392 __m128i result0_low_B_s_32x4;
5393 __m128i result0_high_A_s_32x4;
5394 __m128i result0_high_B_s_32x4;
5395
5396 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel00_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5397 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel00_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5398
5399 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel01_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5400 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel01_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5401
5402 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel02_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5403 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel02_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5404
5405 result0_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5406 result0_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5407 result0_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5408 result0_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5409
5410
5411 __m128i result1_low_A_s_32x4;
5412 __m128i result1_low_B_s_32x4;
5413 __m128i result1_high_A_s_32x4;
5414 __m128i result1_high_B_s_32x4;
5415
5416 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel10_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5417 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel10_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5418
5419 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel11_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5420 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel11_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5421
5422 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel12_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5423 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel12_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5424
5425 result1_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5426 result1_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5427 result1_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5428 result1_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5429
5430
5431 __m128i result2_low_A_s_32x4;
5432 __m128i result2_low_B_s_32x4;
5433 __m128i result2_high_A_s_32x4;
5434 __m128i result2_high_B_s_32x4;
5435
5436 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel20_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5437 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel20_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5438
5439 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel21_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5440 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel21_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5441
5442 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel22_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5443 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel22_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5444
5445 result2_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5446 result2_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5447 result2_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5448 result2_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5449
5450
5451 // now we have int32_t values with 0x0000 or 0xFFFF in the high 16 bits
5452 // thus we can merge 8 int32_t values to 8 int16_t values
5453
5454 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5455
5456 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5457 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5458
5459 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5460 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5461
5462 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5463 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5464
5465
5466 // we combine 16 int16_t values to 16 uint8_t values (saturated)
5467
5468 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5469 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5470 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5471
5472 __m128i resultA_u_8x16;
5473 __m128i resultB_u_8x16;
5474 __m128i resultC_u_8x16;
5475 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5476
5477 // and we can store the result
5478 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5479 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5480 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5481}
5482
5483OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x4)
5484{
5485 ocean_assert(source != nullptr && target != nullptr);
5486
5487 // the documentation of this function is designed for RGBA32 to Y8 conversion
5488 // however, in general this function can be used to apply a linear combination on the four source channels
5489 // to create one output channel
5490
5491 // precise color space conversion:
5492 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5493
5494 // approximation:
5495 // Y = (38 * R + 75 * G + 15 * B) / 128
5496
5497 // we expect the following input pattern (for here RGBA32):
5498 // FEDC BA98 7654 3210
5499 // ABGR ABGR ABGR ABGR
5500
5501 // we calculate:
5502 // (int16_t)((uint8_t)R * (signed char)38) + (int16_t)((uint8_t)G * (signed char)75) for the first 16 bits
5503 // (int16_t)((uint8_t)B * (signed char)15) + (int16_t)((uint8_t)A * (signed char)0) for the second 16 bits
5504
5505 // we store eight 16 bit values holding 64 for rounding purpose:
5506 // FE DC BA 98 76 54 32 10
5507 // 64 64 64 64 64 64 64 64
5508 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5509
5510 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5511 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5512 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5513 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5514
5515 // we get the following pattern
5516 // FE DC BA 98 76 54 32 10
5517 // 0b gr 0b gr 0b gr 0b gr
5518 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5519 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5520 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5521 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5522
5523 // now we sum the pairs of neighboring 16 bit intermediate results
5524 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5525 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5526
5527 // we add 64 for rounding purpose
5528 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5529 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5530
5531 // we shift the multiplication results by 7 bits (= 128)
5532 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5533 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5534
5535 // now we have the following pattern (in two 128 bit registers):
5536 // FEDCBA9876543210
5537 // 0Y0Y0Y0Y0Y0Y0Y0Y
5538
5539 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5540 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5541
5542 // and we can store the result
5543 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5544}
5545
5546void FrameChannels::convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8)
5547{
5548 ocean_assert(source != nullptr && target != nullptr);
5549
5550 // the documentation of this function is designed for RGBA32 to YA16 conversion
5551 // however, in general this function can be used to apply a linear combination on the four source channels
5552 // to create one output channel
5553
5554 // precise color space conversion:
5555 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
5556 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
5557
5558 // approximation:
5559 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
5560 // A = (128 * A) / 128
5561
5562 // we expect the following input pattern (for here RGBA32):
5563 // FEDC BA98 7654 3210
5564 // ABGR ABGR ABGR ABGR
5565
5566 // we store eight 16 bit values holding 64 for rounding purpose:
5567 // FE DC BA 98 76 54 32 10
5568 // 64 64 64 64 64 64 64 64
5569 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5570
5571 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5572 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5573 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5574 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5575
5576 // we convert the 8 bit values to 16 bit values
5577
5578 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5579 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5580
5581 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5582 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5583
5584 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5585 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5586
5587 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5588 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5589
5590 // now we have the following pattern
5591 // FE DC BA 98 76 54 32 10
5592 // 0a 0b 0g 0r 0a 0b 0g 0r
5593
5594 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8); // r * f00 + g * f01 | b * f02 + a * f03 | ...
5595 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5597 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5598 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5599 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5600 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5601 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5602
5603 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4); // r * f00 + g * f01 + b * f02 + a * f03 | ...
5604 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5605 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5606 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5607
5608
5609 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8); // r * f10 + g * f11 | b * f12 + a * f13 | ...
5610 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5612 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5613 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5614 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5615 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5616 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5617
5618 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4); // r * f10 + g * f11 + b * f12 + a * f13 | ...
5619 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5620 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5621 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5622
5623 // now we interleave the results of first and second channel (as both results fit into 16 bit)
5624
5625 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5626 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5627 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5628 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5629
5630 // we add 64 for rounding purpose
5631 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5632 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5633 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5634 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5635
5636 // we shift the multiplication results by 7 bits (= 128)
5637 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5638 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5639 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5640 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5641
5642 // now we have the following pattern (in two 128 bit registers):
5643 // FEDCBA9876543210
5644 // 0A0Y0A0Y0A0Y0A0Y
5645
5646 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5647 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5648 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5649
5650 // and we can store the result
5651 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5652 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5653}
5654
5655#endif // OCEAN_HARDWARE_SSE_VERSION
5656
5657#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5658
5659template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
5660void FrameChannels::convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8)
5661{
5662 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid multiplication factors!");
5663
5664 ocean_assert(source != nullptr && target != nullptr);
5665
5666 // the documentation of this function designed for RGB24 to Y8 conversion
5667
5668 // precise color space conversion:
5669 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5670
5671 // approximation:
5672 // Y = (38 * R + 75 * G + 15 * B) / 128
5673
5674 // we expect the following input pattern (for here RGB24):
5675 // FEDC BA98 7654 3210
5676 // RBGR BGRB GRBG RBGR
5677
5678 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5679 // source_u_8x8x3.val[0]: R R R R R R R R
5680 // source_u_8x8x3.val[1]: G G G G G G G G
5681 // source_u_8x8x3.val[2]: B B B B B B B B
5682
5683 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5684
5685 uint16x8_t intermediateResults_u_16x8;
5686
5687 // we multiply the first channel with the specified factor (unless zero)
5688
5689 if constexpr (tUseFactorChannel0)
5690 {
5691 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5692 }
5693 else
5694 {
5695 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5696 }
5697
5698 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
5699
5700 if constexpr (tUseFactorChannel1)
5701 {
5702 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5703 }
5704
5705 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
5706
5707 if constexpr (tUseFactorChannel2)
5708 {
5709 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5710 }
5711
5712 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
5713 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7); // results_u_8x8 = (intermediateResults_u_16x8 + 2^6) >> 2^7
5714
5715 // and we can store the result
5716 vst1_u8(target, results_u_8x8);
5717}
5718
5719OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5720{
5721 ocean_assert(source != nullptr && target != nullptr);
5722
5723 // the documentation of this function designed for YUV24 to RGB24 conversion
5724
5725 // precise color space conversion:
5726 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5727 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5728 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5729 // | 1 |
5730
5731 // approximation:
5732 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5733 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5734 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5735
5736 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5737 // source_u_8x8x3.val[0]: R R R R R R R R
5738 // source_u_8x8x3.val[1]: G G G G G G G G
5739 // source_u_8x8x3.val[2]: B B B B B B B B
5740
5741 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5742
5743 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5744 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5745 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5746 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5747
5748 // now we apply the 3x3 matrix multiplication
5749
5750 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5751 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5752 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5753
5754 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5755 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5756 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5757
5758 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5759 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5760 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5761
5762 uint8x8x3_t results_u_8x8x3;
5763
5764 // saturated narrow signed to unsigned, normalized by 2^6
5765 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5766 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5767 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5768
5769 // and we can store the result
5770 vst3_u8(target, results_u_8x8x3);
5771}
5772
5773OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5774{
5775 ocean_assert(source != nullptr && target != nullptr);
5776
5777 // the documentation of this function designed for YUV24 to RGB24 conversion
5778
5779 // precise color space conversion:
5780 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5781 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5782 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5783 // | 1 |
5784
5785 // approximation:
5786 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5787 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5788 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5789
5790 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5791
5792 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5793 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5794 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5795 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5796
5797 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5798 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5799 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5800
5801 // now we mulitply apply the 3x3 matrix multiplication
5802
5803 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5804 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5805 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5806
5807 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5808 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5809 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5810
5811 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5812 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5813 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5814
5815 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5816 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5817 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5818
5819 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5820 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5821 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5822
5823 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5824 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5825 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5826
5827 uint8x16x3_t results_u_8x16x3;
5828
5829 // saturated narrow signed to unsigned, normalized by 2^6
5830 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5831 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5832 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5833
5834 // and we can store the result
5835 vst3q_u8(target, results_u_8x16x3);
5836}
5837
5838OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
5839{
5840 ocean_assert(source != nullptr && target != nullptr);
5841
5842 // the documentation of this function designed for RGB24 to YUV24 conversion
5843
5844 // precise color space conversion:
5845 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5846 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5847 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5848 // | 1 |
5849
5850 // approximation:
5851 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5852 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5853 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5854
5855 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5856 // source_u_8x8x3.val[0]: R R R R R R R R
5857 // source_u_8x8x3.val[1]: G G G G G G G G
5858 // source_u_8x8x3.val[2]: B B B B B B B B
5859
5860 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5861
5862 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5863 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5864 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5865
5866 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5867 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5868 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5869
5870 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5871 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5872 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5873
5874 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5875 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5876 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5877
5878 // now we add the bias values (saturated)
5879
5880 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5881 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5882 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5883
5884 uint8x8x3_t results_u_8x8x3;
5885
5886 // saturated narrow signed to unsigned
5887 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5888 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5889 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5890
5891 // and we can store the result
5892 vst3_u8(target, results_u_8x8x3);
5893}
5894
5895OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5896{
5897 ocean_assert(source != nullptr && target != nullptr);
5898
5899 // the documentation of this function designed for YUV24 to RGB24 conversion
5900
5901 // precise color space conversion:
5902 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5903 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5904 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5905 // | 1 |
5906
5907 // approximation:
5908 // | R | | 1192 0 1634 -223 | | Y |
5909 // | G | = | 1192 -400 -833 135 | * | U |
5910 // | B | | 1192 2066 0 -277 | | V |
5911 // | 1 |
5912
5913 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5914 // source_u_8x8x3.val[0]: R R R R R R R R
5915 // source_u_8x8x3.val[1]: G G G G G G G G
5916 // source_u_8x8x3.val[2]: B B B B B B B B
5917
5918 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5919
5920 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5921 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5922 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5923
5924 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5925 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5926
5927 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5928 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5929
5930 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5931 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5932
5933 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5934 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5935
5936
5937 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5938 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5939
5940 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5941 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5942
5943 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5944 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5945
5946 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5947 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5948
5949
5950 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5951 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5952
5953 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5954 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5955
5956 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5957 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5958
5959 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5960 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5961
5962
5963 // now we add the bias values (saturated)
5964
5965 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5966 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5967
5968 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5969 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5970
5971 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5972 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5973
5974
5975 uint8x8x3_t results_u_8x8x3;
5976
5977 // saturated narrow signed to unsigned
5978 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5979 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5980 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5981
5982 // and we can store the result
5983 vst3_u8(target, results_u_8x8x3);
5984}
5985
5986OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5987{
5988 ocean_assert(source != nullptr && target != nullptr);
5989
5990 // the documentation of this function designed for YUV24 to RGB24 conversion
5991
5992 // precise color space conversion:
5993 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5994 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5995 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5996 // | 1 |
5997
5998 // approximation:
5999 // | R | | 1192 0 1634 -223 | | Y |
6000 // | G | = | 1192 -400 -833 135 | * | U |
6001 // | B | | 1192 2066 0 -277 | | V |
6002 // | 1 |
6003
6004 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6005 // source_u_8x8x3.val[0]: R R R R R R R R
6006 // source_u_8x8x3.val[1]: G G G G G G G G
6007 // source_u_8x8x3.val[2]: B B B B B B B B
6008
6009 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6010
6011 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6012 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6013 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6014
6015 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6016 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6017 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6018
6019 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6020 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6021 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6022 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6023
6024 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6025 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6026 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6027 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6028
6029 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6030 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6031 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6032 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6033
6034 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6035 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6036 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6037 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6038
6039
6040 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6041 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6042 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6043 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6044
6045 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6046 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6047 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6048 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6049
6050 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6051 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6052 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6053 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6054
6055 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6056 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6057 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6058 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6059
6060
6061 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6062 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6063 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6064 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6065
6066 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6067 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6068 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6069 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6070
6071 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6072 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6073 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6074 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6075
6076 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6077 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6078 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6079 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6080
6081
6082 // now we add the bias values (saturated)
6083
6084 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6085 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6086 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6087 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6088
6089 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6090 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6091 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6092 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6093
6094 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6095 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6096 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6097 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6098
6099
6100 uint8x16x3_t results_u_8x16x3;
6101
6102 // saturated narrow signed to unsigned
6103 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6104
6105 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6106 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6107
6108 // and we can store the result
6109 vst3q_u8(target, results_u_8x16x3);
6110}
6111
6112OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
6113{
6114 ocean_assert(source != nullptr && target != nullptr);
6115
6116 // the documentation of this function designed for RGB24 to YUV24 conversion
6117
6118 // precise color space conversion:
6119 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
6120 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
6121 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
6122 // | 1 |
6123
6124 // approximation:
6125 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
6126 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
6127 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
6128
6129 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6130 // source_u_8x8x3.val[0]: R R R R R R R R
6131 // source_u_8x8x3.val[1]: G G G G G G G G
6132 // source_u_8x8x3.val[2]: B B B B B B B B
6133
6134 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6135
6136 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6137 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6138 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6139
6140 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6141 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6142 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6143
6144
6145 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6146 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6147 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6148
6149 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6150 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6151 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6152
6153
6154 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6155 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6156 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6157
6158 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6159 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6160 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6161
6162
6163 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6164 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6165 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6166
6167 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6168 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6169 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6170
6171 // now we add the bias values (saturated)
6172
6173 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6174 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6175
6176 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6177 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6178
6179 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6180 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6181
6182
6183 uint8x16x3_t results_u_8x16x3;
6184
6185 // saturated narrow signed to unsigned shift with rounding
6186 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6187 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6188 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6189
6190 // and we can store the result
6191 vst3q_u8(target, results_u_8x16x3);
6192}
6193
6194OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16)
6195{
6196 ocean_assert(source != nullptr && target != nullptr);
6197
6198 // the documentation of this function designed for YUV24 to RGB24 conversion
6199
6200 // precise color space conversion:
6201 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
6202 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
6203 // | B | | 1 1.732446 0.0 -221.753088 | | V |
6204 // | 1 |
6205
6206 // approximation:
6207 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
6208 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
6209 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
6210
6211 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6212
6213 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
6214 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6215 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6216 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6217
6218 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6219 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6220 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6221
6222 // now we mulitply apply the 3x3 matrix multiplication
6223
6224 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6225 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6226 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6227
6228 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6229 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6230 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6231
6232 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
6233 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6234 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6235
6236 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6237 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6238 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6239
6240 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6241 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6242 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6243
6244 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6245 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6246 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6247
6248 uint8x16x4_t results_u_8x16x4;
6249
6250 // saturated narrow signed to unsigned, normalized by 2^6
6251 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6252 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6253 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6254 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6255
6256 // and we can store the result
6257 vst4q_u8(target, results_u_8x16x4);
6258}
6259
6260template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
6261void FrameChannels::convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8)
6262{
6263 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid multiplication factors!");
6264
6265 ocean_assert(source != nullptr && target != nullptr);
6266
6267 // the documentation of this function designed for RGBA32 to Y8 conversion
6268
6269 // precise color space conversion:
6270 // Y = 0.299 * R + 0.587 * G + 0.114 * B
6271
6272 // approximation:
6273 // Y = (38 * R + 75 * G + 15 * B) / 128
6274
6275 // we expect the following input pattern (for here RGBA32):
6276 // FEDC BA98 7654 3210
6277 // ABGR ABGR ABGR ABGR
6278
6279 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6280 // m4_64_pixels.val[0]: R R R R R R R R
6281 // m4_64_pixels.val[1]: G G G G G G G G
6282 // m4_64_pixels.val[2]: B B B B B B B B
6283 // m4_64_pixels.val[3]: A A A A A A A A
6284
6285 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6286
6287 uint16x8_t intermediateResults_16x8;
6288
6289 // we multiply the first channel with the specified factor (unless zero)
6290
6291 if constexpr (tUseFactorChannel0)
6292 {
6293 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6294 }
6295 else
6296 {
6297 intermediateResults_16x8 = vdupq_n_u16(0u);
6298 }
6299
6300 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
6301
6302 if constexpr (tUseFactorChannel1)
6303 {
6304 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6305 }
6306
6307 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
6308
6309 if constexpr (tUseFactorChannel2)
6310 {
6311 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6312 }
6313
6314 // we multiply the fourth channel with the specified factor (unless zero) and accumulate the results
6315
6316 if constexpr (tUseFactorChannel3)
6317 {
6318 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6319 }
6320
6321 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6322 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7); // pixels_u_8x8x4 = (intermediateResults_16x8 + 2^6) >> 2^7
6323
6324 // and we can store the result
6325 vst1_u8(target, results_u_8x8);
6326}
6327
6328OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8)
6329{
6330 ocean_assert(source != nullptr && target != nullptr);
6331
6332 // the documentation of this function designed for RGBA32 to YA16 conversion
6333
6334 // precise color space conversion:
6335 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
6336 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
6337
6338 // approximation:
6339 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
6340 // A = (128 * A) / 128
6341
6342 // we expect the following input pattern (for here RGBA32):
6343 // FEDC BA98 7654 3210
6344 // ABGR ABGR ABGR ABGR
6345
6346 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6347 // m4_64_pixels.val[0]: R R R R R R R R
6348 // m4_64_pixels.val[1]: G G G G G G G G
6349 // m4_64_pixels.val[2]: B B B B B B B B
6350 // m4_64_pixels.val[3]: A A A A A A A A
6351
6352 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6353
6354 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6355 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6356
6357 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6358 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6359
6360 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6361 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6362
6363 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6364 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6365
6366 uint8x8x2_t results_u_8x8x2;
6367
6368 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6369
6370 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7); // results_u_8x8x2.val[0] = (intermediateResultsChannel0_16x8 + 2^6) >> 2^7
6371 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6372
6373 // and we can store the result
6374 vst2_u8(target, results_u_8x8x2);
6375}
6376
6377#endif // OCEAN_HARDWARE_NEON_VERSION
6378
6379}
6380
6381}
6382
6383#endif // META_OCEAN_CV_FRAME_CHANNELS_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5346
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4292
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6194
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4192
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5192
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4882
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4955
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4095
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4010
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5773
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4491
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4726
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3972
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5719
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:5986
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5895
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4859
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4133
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4619
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4057
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6112
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5838
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4331
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5483
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4032
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5119
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5258
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:3991
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6328
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4352
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4425
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4514
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5030
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5546
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3483
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:603
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:594
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3506
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1216
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3173
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3724
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3869
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight int16_t values by applying a right shift.
Definition SSE.h:3104
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:4014
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3410
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3492
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3904
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3369
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3875
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3517
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:4005
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3477
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1879
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
typename TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1842
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32