Ocean
Loading...
Searching...
No Matches
FrameChannels.h
Go to the documentation of this file.
1/*
2 * Copyright (c) Meta Platforms, Inc. and affiliates.
3 *
4 * This source code is licensed under the MIT license found in the
5 * LICENSE file in the root directory of this source tree.
6 */
7
8#ifndef META_OCEAN_CV_FRAME_CHANNELS_H
9#define META_OCEAN_CV_FRAME_CHANNELS_H
10
11#include "ocean/cv/CV.h"
13#include "ocean/cv/NEON.h"
14#include "ocean/cv/SSE.h"
15
16#include "ocean/base/DataType.h"
17#include "ocean/base/Frame.h"
18#include "ocean/base/Worker.h"
19
20namespace Ocean
21{
22
23namespace CV
24{
25
26/**
27 * This class implements frame channel conversion, transformation and extraction functions.
28 * @ingroup cv
29 */
30class OCEAN_CV_EXPORT FrameChannels : public FrameConverter
31{
32 public:
33
34 /**
35 * Definition of a constant to specify that the number of channels are not known at compile time but at runtime only.
36 */
37 static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME = 0u;
38
39 /**
40 * Definition of a function pointer to a function able to operate on an entire image row.
41 */
42 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
43 using RowOperatorFunction = void(*)(const TSource* sourceRow, TTarget* targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements);
44
45 /**
46 * The following comfort class provides comfortable functions simplifying prototyping applications but also increasing binary size of the resulting applications.
47 * Best practice is to avoid using these functions if binary size matters,<br>
48 * as for every comfort function a corresponding function exists with specialized functionality not increasing binary size significantly.<br>
49 */
50 class OCEAN_CV_EXPORT Comfort
51 {
52 public:
53
54 /**
55 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
56 * Usage:
57 * @code
58 * Frame rgbSourceFrame = ...;
59 *
60 * Frames targetFrames;
61 *
62 * if (separateTo1Channel(rgbSourceFrame, targetFrames))
63 * {
64 * ocean_assert(targetFrames.size() == 3);
65 *
66 * // do something with targetFrames
67 * }
68 * @endcode
69 * @param sourceFrame The frame to be separated, must be valid
70 * @param targetFrames The resulting frames each holding one channel of the source frame, will be set automatically
71 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
72 * @return True, if succeeded
73 */
74 static bool separateTo1Channel(const Frame& sourceFrame, Frames& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
75
76 /**
77 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
78 * Usage:
79 * @code
80 * Frame rgbSourceFrame = ...;
81 *
82 * Frame targetFrameA;
83 * Frame targetFrameB;
84 * Frame targetFrameC;
85 *
86 * if (separateTo1Channel(rgbSourceFrame, {&targetFrameA, &targetFrameB, &targetFrameC}))
87 * {
88 * // do something with targetFrames
89 * }
90 * @endcode
91 * @param sourceFrame The frame to be separated, must be valid
92 * @param targetFrames The resulting frames each holding one channel of the source frame, one for each source channels
93 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
94 * @return True, if succeeded
95 */
96 static bool separateTo1Channel(const Frame& sourceFrame, const std::initializer_list<Frame*>& targetFrames, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
97
98 /**
99 * Zips/interleaves 1-channel images into one image with n-channels.
100 * Usage:
101 * @code
102 * Frame sourceFrameA = ...;
103 * Frame sourceFrameB = ...;
104 * Frame sourceFrameC = ...;
105 *
106 * Frame targetFrame;
107 * if (zipChannels({sourceFrameA, sourceFrameB, sourceFrameC}, targetFrame))
108 * {
109 * ocean_assert(targetFrame.channels() == 3u);
110 *
111 * // do something with targetFrame
112 * }
113 * @endcode
114 * @param sourceFrames The frames to be zipped/interleaved, must be valid
115 * @param targetFrame The resulting frame holding n channels, will be set automatically
116 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
117 * @return True, if succeeded
118 */
119 static bool zipChannels(const std::initializer_list<Frame>& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
120
121 /**
122 * Zips/interleaves 1-channel images into one image with n-channels.
123 * Usage:
124 * @code
125 * Frames sourceFrames = ...;
126 *
127 * Frame targetFrame;
128 * if (zipChannels(sourceFrames, targetFrame))
129 * {
130 * ocean_assert(targetFrame.channels() == sourceFrames.size());
131 *
132 * // do something with targetFrame
133 * }
134 * @endcode
135 * @param sourceFrames The frames to be zipped/interleaved, must be valid
136 * @param targetFrame The resulting frame holding n channels, will be set automatically
137 * @param targetPixelFormat Optional explicit pixel format of the target frames, must bet a pixel format with 1 channel and must fit with the data type of the source pixel format, otherwise FORMAT_UNDEFINED
138 * @return True, if succeeded
139 */
140 static bool zipChannels(const Frames& sourceFrames, Frame& targetFrame, const FrameType::PixelFormat targetPixelFormat = FrameType::FORMAT_UNDEFINED);
141
142 /**
143 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
144 * @param frame The image to convert, must be valid
145 * @param worker Optional worker object to distribute the computation
146 * @return True, if succeeded
147 * @see straightAlphaToPremultipliedAlpha().
148 */
149 static bool premultipliedAlphaToStraightAlpha(Frame& frame, Worker* worker = nullptr);
150
151 /**
152 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
153 * @param source The source image to convert, must be valid
154 * @param target The resulting converted target image, the frame type will be changed if it is not match to the source frame
155 * @param worker Optional worker object to distribute the computation
156 * @return True, if succeeded
157 * @see straightAlphaToPremultipliedAlpha().
158 */
159 static bool premultipliedAlphaToStraightAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
160
161 /**
162 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
163 * @param frame The image to convert, must be valid
164 * @param worker Optional worker object to distribute the computation
165 * @see premultipliedAlphaToStraightAlpha().
166 */
167 static bool straightAlphaToPremultipliedAlpha(Frame& frame, Worker* worker = nullptr);
168
169 /**
170 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
171 * @param source The source image to convert, must be valid
172 * @param target The resulting converted target image, must be valid
173 * @param worker Optional worker object to distribute the computation
174 * @see premultipliedAlphaToStraightAlpha().
175 */
176 static bool straightAlphaToPremultipliedAlpha(const Frame& source, Frame& target, Worker* worker = nullptr);
177 };
178
179 /**
180 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
181 * Usage:
182 * @code
183 * const unsigned int width = ...;
184 * const unsigned int height = ...;
185 *
186 * uint8_t* sourceFrame = ...;
187 * const unsigned int sourceFramePaddingElements = ...;
188 *
189 * constexpr unsigned int channels = 2u;
190 *
191 * const uint8_t* targetFrames[channels] = {..., ...};
192 * const unsigned int targetFramesPaddingElements[2] = {..., ...};
193 *
194 * separateTo1Channel<uint8_t, uint8_t, channels>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
195 * @endcode
196 * @param sourceFrame The frame to be separated, must be valid
197 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
198 * @param width The width of the source frame in pixel, with range [1, infinity)
199 * @param height The height of the source frame in pixel, with range [1, infinity)
200 * @param channels The number of channels the source frame has, with range [1, infinity)
201 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
202 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity), nullptr if all are zero
203 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
204 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
205 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
206 */
207 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
208 static void separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
209
210 /**
211 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
212 * Usage:
213 * @code
214 * const unsigned int width = ...;
215 * const unsigned int height = ...;
216 *
217 * const uint8_t* sourceFrame = ...;
218 * const unsigned int sourceFramePaddingElements = ...;
219 *
220 * uint8_t* targetFrame0 = ...;
221 * uint8_t* targetFrame1 = ...;
222 * const unsigned int targetFramePaddingElements0 = ...;
223 * const unsigned int targetFramePaddingElements1 = ...;
224 *
225 * separateTo1Channel<uint8_t, uint8_t>(sourceFrame, {targetFrame0, targetFrame1}, width, height, sourceFramePaddingElements, {targetFramePaddingElements0, targetFramePaddingElements1});
226 * @endcode
227 * @param sourceFrame The frame to be separated, must be valid
228 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
229 * @param width The width of the source frame in pixel, with range [1, infinity)
230 * @param height The height of the source frame in pixel, with range [1, infinity)
231 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
232 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
233 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
234 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
235 */
236 template <typename TSource, typename TTarget>
237 static void separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements);
238
239 /**
240 * Zips/interleaves 1-channel images into one image with n-channels.
241 * Usage:
242 * @code
243 * const unsigned int width = ...;
244 * const unsigned int height = ...;
245 *
246 * const uint8_t* sourceFrames[2] = {..., ...};
247 * const unsigned int sourceFramesPaddingElements[2] = {..., ...};
248 *
249 * uint8_t* targetFrame = ...;
250 * const unsigned int targetFramePaddingElements = ...;
251 *
252 * zipChannels<uint8_t, uint8_t>(sourceFrames, targetFrame, width, height, 2u, sourceFramesPaddingElements, targetFramePaddingElements);
253 * @endcode
254 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
255 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
256 * @param width The width of the source frames in pixel, with range [1, infinity)
257 * @param height The height of the source frames in pixel, with range [1, infinity)
258 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
259 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity), nullptr if all are zero
260 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
261 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
262 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
263 * @tparam tChannels The number of source frames (and target channels) if known at compile time; otherwise CHANNELS_NOT_KNOWN_AT_COMPILE_TIME == 0, if know at compile time must be identical with 'channels'
264 */
265 template <typename TSource, typename TTarget, unsigned int tChannels = CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>
266 static void zipChannels(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
267
268 /**
269 * Zips/interleaves 1-channel images into one image with n-channels.
270 * Usage:
271 * @code
272 * const unsigned int width = ...;
273 * const unsigned int height = ...;
274 *
275 * const uint8_t* sourceFrame0 = ...;
276 * const uint8_t* sourceFrame1 = ...;
277 * const unsigned int sourceFramePaddingElements0 = ...;
278 * const unsigned int sourceFramePaddingElements1 = ...;
279 *
280 * uint8_t* targetFrame = ...;
281 * const unsigned int targetFramePaddingElements = ...;
282 *
283 * zipChannels<uint8_t, uint8_t>({sourceFrame0, sourceFrame1}, targetFrame, width, height, {sourceFramePaddingElements0, sourceFramePaddingElements1}, targetFramePaddingElements);
284 * @endcode
285 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
286 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
287 * @param width The width of the source frames in pixel, with range [1, infinity)
288 * @param height The height of the source frames in pixel, with range [1, infinity)
289 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
290 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
291 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
292 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
293 */
294 template <typename TSource, typename TTarget>
295 static void zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
296
297 /**
298 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the front of all existing channels.
299 * @param source The source frame to which the new channel will be added, must be valid
300 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
301 * @param target The target frame receiving the joined channels, must be valid
302 * @param width The width of the frames in pixel, with range [1, infinity)
303 * @param height The height of the frames in pixel, with range [1, infinity)
304 * @param conversionFlag The conversion to be applied
305 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
306 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
307 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
308 * @param worker Optional worker object to distribute the computational load
309 * @tparam T Data type of each channel pixel value
310 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
311 */
312 template <typename T, unsigned int tSourceChannels>
313 static inline void addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
314
315 /**
316 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
317 * @param source The source frame that provided the existing channels
318 * @param newChannelValue Value that will be assigned to the new channel for each pixel
319 * @param target The target frame to that the existing channels and the new channel will be added (as new first channel)
320 * @param width The width of the frames in pixel, with range [1, infinity)
321 * @param height The height of the frames in pixel, with range [1, infinity)
322 * @param conversionFlag The conversion to be applied
323 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
324 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
325 * @param worker Optional worker object to distribute the computational load
326 * @tparam T Data type of each channel pixel value
327 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
328 */
329 template <typename T, unsigned int tSourceChannels>
330 static inline void addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
331
332 /**
333 * Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the back of all existing channels.
334 * @param source The source frame to which the new channel will be added, must be valid
335 * @param sourceNewChannel The 1-channel frame providing the new channel information, must be valid
336 * @param target The target frame receiving the joined channels, must be valid
337 * @param width The width of the frames in pixel, with range [1, infinity)
338 * @param height The height of the frames in pixel, with range [1, infinity)
339 * @param conversionFlag The conversion to be applied
340 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
341 * @param sourceNewChannelPaddingElements The number of padding elements at the end of each new-channel-source row, in elements, with range [0, infinity)
342 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
343 * @param worker Optional worker object to distribute the computational load
344 * @tparam T Data type of each channel pixel value
345 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
346 */
347 template <typename T, unsigned int tSourceChannels>
348 static inline void addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
349
350 /**
351 * Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be the same for each pixel.
352 * @param source The source frame that provided the existing channels
353 * @param newChannelValue Value that will be assigned to the new channel for each pixel
354 * @param target The target frame to that the existing channels and the new channel will be added (as new last channel)
355 * @param width The width of the frames in pixel, with range [1, infinity)
356 * @param height The height of the frames in pixel, with range [1, infinity)
357 * @param conversionFlag The conversion to be applied
358 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
359 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
360 * @param worker Optional worker object to distribute the computational load
361 * @tparam T Data type of each channel pixel value
362 * @tparam tSourceChannels Number of channels of the source frame (without the new channel)
363 */
364 template <typename T, unsigned int tSourceChannels>
365 static inline void addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
366
367 /**
368 * Removes the first channel from a given frame with zipped (generic) pixel format.
369 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
370 * @param source The source frame from that the first channel will be removed, must be valid
371 * @param target The target frame without the first channel, must be valid
372 * @param width The width of the frames in pixel, with range [1, infinity)
373 * @param height The height of the frames in pixel, with range [1, infinity)
374 * @param conversionFlag The conversion to be applied
375 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
376 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
377 * @param worker Optional worker object to distribute the computational load
378 * @tparam T Data type of each channel pixel value
379 * @tparam tSourceChannels Number of channels of the source frame (including the channel that will be removed), with range [2, infinity)
380 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeLastChannel().
381 */
382 template <typename T, unsigned int tSourceChannels>
383 static inline void removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
384
385 /**
386 * Removes the last channel from a given frame with zipped (generic) pixel format.
387 * This function is mainly a wrapper around FrameChannels::shuffleChannels().
388 * @param source The source frame from that the first channel will be removed, must be valid
389 * @param target The target frame without the first channel, must be valid
390 * @param width The width of the frames in pixel, with range [1, infinity)
391 * @param height The height of the frames in pixel, with range [1, infinity)
392 * @param conversionFlag The conversion to be applied
393 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
394 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
395 * @param worker Optional worker object to distribute the computational load
396 * @tparam T Data type of each channel pixel value
397 * @tparam tSourceChannels Number of channels of the frame (including the channel that will be removed), with range [2, infinity)
398 * @see FrameChannels::shuffleChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>(), removeFirstChannel().
399 */
400 template <typename T, unsigned int tSourceChannels>
401 static inline void removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
402
403 /**
404 * Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel format.
405 * @param source The source frame from that the channel will be copied, must be valid
406 * @param target The target frame to which the channel will be copied, must be valid
407 * @param width The width of both frames in pixel, with range [1, infinity)
408 * @param height The height of both frames in pixel, with range [1, infinity)
409 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
410 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
411 * @param worker Optional worker object to distribute the computational load
412 * @tparam T Data type of each channel pixel value
413 * @tparam tSourceChannels Number of channels in the source frame, with range [1, infinity)
414 * @tparam tTargetChannels Number of channels in the target frame, with range [1, infinity)
415 * @tparam tSourceChannelIndex The index of the source channel that will be copied, with range [0, tSourceChannels - 1]
416 * @tparam tTargetChannelIndex The index of the target channel that will be copied, with range [0, tTargetChannels - 1]
417 */
418 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
419 static inline void copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
420
421 /**
422 * Sets one channel of a frame with a specific unique value.
423 * @param frame The frame in that one channel of each pixel will be set
424 * @param width The width of the frame in pixel, with range [1, infinity)
425 * @param height The height of the frame in pixel, with range [1, infinity)
426 * @param value The value to be set
427 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
428 * @param worker Optional worker object to distribute the computation
429 * @tparam T Data type of each channel pixel value
430 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
431 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
432 */
433 template <typename T, unsigned int tChannel, unsigned int tChannels>
434 static inline void setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker = nullptr);
435
436 /**
437 * Reverses the order of the channels of a frame with zipped pixel format.
438 * The first channel will be exchanged with the last channel, the second channel will be exchanged with the second last channel and so on.
439 * @param source The source frame from that the channels will be swapped, must be valid
440 * @param target The target frame that receives the swapped channels, must be valid
441 * @param width The width of the source frame in pixel, with range (0, infinity)
442 * @param height The height of the source frame in pixel, with range (0, infinity)
443 * @param conversionFlag The conversion to be applied
444 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
445 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
446 * @param worker Optional worker object to distribute the computation
447 * @tparam T Data type of each channel pixel value
448 * @tparam tChannels Number of data channels, with range [1, infinity)
449 */
450 template <typename T, unsigned int tChannels>
451 static inline void reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
452
453 /**
454 * Shuffles the channels of a frame by an arbitrary pattern.
455 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
456 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
457 * <pre>
458 * source pixel R G B A
459 * 0 1 2 3
460 * target pixel B G R A
461 * 2 1 0 3
462 * pattern (with reversed order): 0x3012
463 * </pre>
464 * @param source The source frame for which the channels will be shuffled, must be valid
465 * @param target The target frame that receives the shuffled channels, must be valid
466 * @param width The width of the source frame in pixel, with range [1, infinity)
467 * @param height The height of the source frame in pixel, with range [1, infinity)
468 * @param conversionFlag The conversion to be applied
469 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
470 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
471 * @param worker Optional worker object to distribute the computation
472 * @tparam T Data type of each channel pixel value
473 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
474 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
475 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
476 */
477 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
478 static inline void shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
479
480 /**
481 * Shuffles the channels of source frame and sets the last channel with constant value in the target frame.
482 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
483 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
484 * <pre>
485 * source pixel R G B
486 * 0 1 2
487 * target pixel B G R A
488 * 2 1 0
489 * pattern (with reversed order): 0x012
490 * </pre>
491 * @param source The source frame for which the channels will be shuffled, must be valid
492 * @param newChannelValue The constant channel value which will be added as last channel to the target frame, with range [0, infinity)
493 * @param target The target frame that receives the shuffled channels, must be valid
494 * @param width The width of the source frame in pixel, with range [1, infinity)
495 * @param height The height of the source frame in pixel, with range [1, infinity)
496 * @param conversionFlag The conversion to be applied
497 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
498 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
499 * @param worker Optional worker object to distribute the computation
500 * @tparam T Data type of each channel pixel value
501 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
502 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
503 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
504 */
505 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
506 static inline void shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
507
508 /**
509 * Narrows 16 bit channels of a frame to 8 bit channels.
510 * @param source The source frame for which the channels will be narrowed, must be valid
511 * @param target The target frame that receives the narrowed channels, must be valid
512 * @param width The width of the source frame in pixel, with range [1, infinity)
513 * @param height The height of the source frame in pixel, with range [1, infinity)
514 * @param conversionFlag The conversion to be applied
515 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
516 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
517 * @param worker Optional worker object to distribute the computation
518 * @tparam tChannels Number of source data channels, with range [1, infinity)
519 */
520 template <unsigned int tChannels>
521 static inline void narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
522
523 /**
524 * Applies a specific modifier function on each pixel.
525 * @param source The source frame providing the pixel information, must be valid
526 * @param target The target frame receiving the pixel information, must be valid
527 * @param width The width of the source frame in pixel, with range (0, infinity)
528 * @param height The height of the source frame in pixel, with range (0, infinity)
529 * @param conversionFlag The conversion to be applied
530 * @param worker Optional worker object to distribute the computation
531 * @tparam T Data type of each channel pixel value
532 * @tparam tChannels Number of data channels, with range [1, infinity)
533 * @tparam tPixelFunction Pixel modification function
534 */
535 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
536 static void applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker = nullptr);
537
538 /**
539 * Applies a specific modifier function on each pixel.
540 * @param source The source frame providing the pixel information, must be valid
541 * @param target The target frame receiving the pixel information, must be valid
542 * @param width The width of the source frame in pixel, with range [1, infinity)
543 * @param height The height of the source frame in pixel, with range [1, infinity)
544 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
545 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
546 * @param conversionFlag The conversion to be applied
547 * @param worker Optional worker object to distribute the computation
548 * @tparam TSource Data type of each source channel pixel value
549 * @tparam TTarget Data type of each target channel pixel value
550 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
551 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
552 * @tparam tPixelFunction Pixel modification function
553 */
554 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
555 static void applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
556
557 /**
558 * Generic bivariate pixel operations
559 * Applies bivariate per-pixel operators: `C(y, x) = op(A(y, x), B(y, x))`. Input and output must have the same frame type and have a single plane.
560 * @param source0 First source frame
561 * @param source1 Second source frame
562 * @param target The target frame
563 * @param width The width of the source frame in pixel, with range [1, infinity)
564 * @param height The height of the source frame in pixel, with range [1, infinity)
565 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
566 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
567 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
568 * @param conversionFlag The conversion to be applied
569 * @param worker Optional worker object to distribute the computation
570 * @tparam TSource0 Type of the first data source
571 * @tparam TSource1 Type of the second data source
572 * @tparam TTarget Type of the target
573 * @tparam TIntermediate Data type that is used for the computation of intermediate results, e.g. if TSource0 and TSource1 are different
574 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
575 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
576 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
577 */
578 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
579 static void applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker = nullptr);
580
581 /**
582 * Applies a row operator to all rows of a source image.
583 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
584 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
585 * @param source The source frame to which the row operator is applied, must be valid
586 * @param target The target frame receiving the result of the row operator, must be valid
587 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
588 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
589 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
590 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
591 * @param rowOperatorFunction The pointer to the row operator function, must be valid
592 * @param worker Optional worker object to distribute the computation
593 * @tparam TSource The data type of the source elements
594 * @tparam TTarget The data type of the target elements
595 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
596 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
597 */
598 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
599 static void applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker = nullptr);
600
601 /**
602 * Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
603 * This function mainly mirrors or flips an image.
604 * @param source The source frame buffer, must be valid
605 * @param target The target frame buffer, must be valid
606 * @param width The width of the frame in pixel, with range [1, infinity)
607 * @param height The height of the frame in pixel, with range [1, infinity)
608 * @param conversionFlag The conversion to be applied
609 * @param sourcePaddingElements Optional padding at the end of each source row in elements, with range [0, infinity)
610 * @param targetPaddingElements Optional padding at the end of each target row in elements, with range [0, infinity)
611 * @param worker Optional worker object to distribute the computation
612 * @tparam T Data type of each channel pixel value, e.g., 'uint8_t', 'float', ...
613 * @tparam tChannels Number of channels of the frame, with range [1, infinity)
614 */
615 template <typename T, unsigned int tChannels>
616 static inline void transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker);
617
618 /**
619 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
620 * @param frame The image to convert, must be valid
621 * @param width The width of the image in pixel, with range [1, infinity)
622 * @param height The height of the image in pixel, with range [1, infinity)
623 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
624 * @param worker Optional worker object to distribute the computation
625 * @tparam tChannels The number of frame channels, with range [2, infinity)
626 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
627 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
628 */
629 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
630 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
631
632 /**
633 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
634 * @param source The source image to convert, must be valid
635 * @param target The resulting converted target image, must be valid
636 * @param width The width of the image in pixel, with range [1, infinity)
637 * @param height The height of the image in pixel, with range [1, infinity)
638 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
639 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
640 * @param worker Optional worker object to distribute the computation
641 * @tparam tChannels The number of frame channels, with range [2, infinity)
642 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
643 * @see straightAlphaToPremultipliedAlpha8BitPerChannel().
644 */
645 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
646 static inline void premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
647
648 /**
649 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
650 * @param frame The image to convert, must be valid
651 * @param width The width of the image in pixel, with range [1, infinity)
652 * @param height The height of the image in pixel, with range [1, infinity)
653 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
654 * @param worker Optional worker object to distribute the computation
655 * @tparam tChannels The number of frame channels, with range [2, infinity)
656 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
657 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
658 */
659 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
660 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker = nullptr);
661
662 /**
663 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
664 * @param source The source image to convert, must be valid
665 * @param target The resulting converted target image, must be valid
666 * @param width The width of the image in pixel, with range [1, infinity)
667 * @param height The height of the image in pixel, with range [1, infinity)
668 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
669 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
670 * @param worker Optional worker object to distribute the computation
671 * @tparam tChannels The number of frame channels, with range [2, infinity)
672 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
673 * @see premultipliedAlphaToStraightAlpha8BitPerChannel().
674 */
675 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
676 static inline void straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker = nullptr);
677
678 /**
679 * Reverses/mirrors the order of pixels in a given row (or a memory block in general).
680 * @param source The pointer to the source pixels, must be valid
681 * @param target The pointer to the target pixels receiving the reversed/mirrored pixel data, must be valid
682 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
683 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
684 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
685 */
686 template <typename T, unsigned int tChannels>
687 static void reverseRowPixelOrder(const T* source, T* target, const size_t size);
688
689 /**
690 * Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
691 * @param data The pointer to the pixels, must be valid
692 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
693 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
694 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
695 */
696 template <typename T, unsigned int tChannels>
697 static void reverseRowPixelOrderInPlace(T* data, const size_t size);
698
699 /**
700 * Reverses/mirrors the order of channels in a given row (or a memory block in general).
701 * @param source The pointer to the source pixels, must be valid
702 * @param target The pointer to the target pixels receiving the reversed/mirrored channels, must be valid
703 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
704 * @param unusedOptions An unused options parameters, must be nullptr
705 * @tparam T The data type of the pixel elements, e.g, 'uint8_t', 'int'
706 * @tparam tChannels The number of channels (the number of elements) each pixel has, with range [1, infinity)
707 */
708 template <typename T, unsigned int tChannels>
709 static void reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
710
711 /**
712 * Shuffles the channels of row pixels by application of a specified shuffle pattern.
713 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
714 * For the shuffling from e.g., an RGBA32 row to a BGRA32 row the pattern 0x3012u must be defined:
715 * <pre>
716 * source pixel R G B A
717 * 0 1 2 3
718 * target pixel B G R A
719 * 2 1 0 3
720 * pattern (with reversed order): 0x3012
721 * </pre>
722 * @param source The pointer to the source pixels, must be valid
723 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
724 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
725 * @param unusedOptions An unused options parameters, must be nullptr
726 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
727 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
728 * @tparam tTargetChannels Number of target data channels, with range [1, 8u]
729 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
730 */
731 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
732 static inline void shuffleRowChannels(const T* source, T* target, const size_t size, const void* unusedOptions = nullptr);
733
734 /**
735 * Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last channel with constant value in the target row.
736 * The shuffle pattern is defined in groups of four bits defining the source channels.<br>
737 * For the shuffling from e.g., an RGB24 row to a BGRA32 row the pattern 0x012u must be defined:
738 * <pre>
739 * source pixel R G B
740 * 0 1 2
741 * target pixel B G R A
742 * 2 1 0
743 * pattern (with reversed order): 0x012
744 * </pre>
745 * @param source The pointer to the source pixels, must be valid
746 * @param target The pointer to the target pixels, receiving the shuffled channels, must be valid
747 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
748 * @param options Pointer to the constant channel value which will be added to the end of the target channels, must be valid
749 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
750 * @tparam tSourceChannels Number of source data channels, with range [1, 8u]
751 * @tparam tTargetChannels Number of target data channels, including the additional extra target channel, with range [2, 8u]
752 * @tparam tShufflePattern Groups of four bits define the source channel, e.g., 0x76543210 defines the identity transformation, 0x01234567 defines the reverse transformation
753 */
754 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
755 static inline void shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options = nullptr);
756
757 /**
758 * Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the four channels.
759 * This function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
760 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
761 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
762 * @param source The pointer to the source pixels, must be valid
763 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
764 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
765 * @param channelMultiplicationFactors_128 The three uint32_t multiplication factors, one for each channel, with range [0, 128], while the sum of all four factors must be 128, must be valid
766 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
767 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
768 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
769 */
770 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
771 static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
772
773 /**
774 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation.
775 * This function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
776 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
777 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
778 * The transformation is based on the following pattern:
779 * <pre>
780 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
781 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
782 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
783 * </pre>
784 * With t target, s source, f factor, and b bias/translation.<br>
785 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
786 * @param source The pointer to the source pixels, must be valid
787 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
788 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
789 * @param parameters The 12 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128]
790 */
791 static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
792
793 /**
794 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
795 * This function can be used to e.g., convert RGB24 to YUV24, or BGR24 to YVU24.
796 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
797 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
798 * The transformation is based on the following pattern:
799 * <pre>
800 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
801 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
802 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
803 * </pre>
804 * With t target, s source, f factor, and b bias.<br>
805 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
806 * @param source The pointer to the source pixels, must be valid
807 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
808 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
809 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f22_128, b0, b1, b2, with ranges [-127, 127]
810 */
811 static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
812
813 /**
814 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
815 * This function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
816 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
817 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
818 * The transformation is based on the following pattern:
819 * <pre>
820 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + b0, 255)
821 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + b1, 255)
822 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + b2, 255)
823 * </pre>
824 * With t target, s source, f factor, and b bias.<br>
825 * Factors must be specified in relation to a denominator of 1024, bias values must be specified with a denominator of 1.
826 * @param source The pointer to the source pixels, must be valid
827 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
828 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
829 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_1024, f10_1024, f20_1024, f01_1024, f02_1024, ..., f22_1024, b0, b1, b2, with ranges [-1024 * 16, 1024 * 16]
830 */
831 static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
832
833 /**
834 * Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the three channels plus an translational part applied to the source data before applying the linear transformation (for the first three channels).
835 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
836 * This function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
837 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator, plus one translation parameter for each source channel (with 1 as denominator).<br>
838 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
839 * The transformation is based on the following pattern:
840 * <pre>
841 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
842 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
843 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
844 * t3 = valueChannel3
845 * </pre>
846 * With t target, s source, f factor, and b bias/translation.<br>
847 * Factors must be specified in relation to a denominator of 64, bias values must be specified with a denominator of 1.
848 * @param source The pointer to the source pixels, must be valid
849 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
850 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
851 * @param parameters The 13 int32_t parameters of the column-aligned 3x3 transformation matrix, plus 3 translation parameters: f00_64, f10_64, f20_64, f01_64, f02_64, ..., f22_64, with ranges [-128, 128], b0, b1, b2, with ranges [0, 128], valueChannel3, with range [0, 255]
852 */
853 static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
854
855 /**
856 * Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the four channels.
857 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
858 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
859 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
860 * <pre>
861 * t0 = f0 * s0 + f1 * s1 + f2 * s2 + f3 * s3
862 * </pre>
863 * @param source The pointer to the source pixels, must be valid
864 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
865 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
866 * @param channelMultiplicationFactors_128 The four uint32_t multiplication factors, one for each channel, with range [0, 127], while the sum of all four factors must be 128, must be valid
867 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
868 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
869 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
870 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
871 */
872 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
873 static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128);
874
875 /**
876 * Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the four channels.
877 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
878 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
879 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
880 * The transformation is based on the following pattern:
881 * <pre>
882 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3
883 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3
884 * </pre>
885 * @param source The pointer to the source pixels, must be valid
886 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
887 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
888 * @param multiplicationFactors_128 The 8 int32_t parameters of the column-aligned 2x4 transformation matrix: f00_128, f10_128, f01_128, ..., f13_128, with range [0, 127], while the sum of all four row factors must be 128, must be valid
889 */
890 static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* multiplicationFactors_128);
891
892 /**
893 * Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the three channels plus a bias (translation) part.
894 * This function can be used to e.g., convert RGBA32 to YUV24, or BGRA24 to YVU24.
895 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator, plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
896 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
897 * The transformation is based on the following pattern:
898 * <pre>
899 * t0 = clamp(0, f00 * s0 + f01 * s1 + f02 * s2 + f03 * s3 + b0, 255)
900 * t1 = clamp(0, f10 * s0 + f11 * s1 + f12 * s2 + f13 * s3 + b1, 255)
901 * t2 = clamp(0, f20 * s0 + f21 * s1 + f22 * s2 + f23 * s3 + b2, 255)
902 * </pre>
903 * With t target, s source, f factor, and b bias.<br>
904 * Factors must be specified in relation to a denominator of 128, bias values must be specified with a denominator of 1.
905 * @param source The pointer to the source pixels, must be valid
906 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
907 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
908 * @param parameters The 12 int32_t parameters of the column-aligned 3x4 transformation matrix: f00_128, f10_128, f20_128, f01_128, f02_128, ..., f23_128, b0, b1, b2, with ranges [-127, 127]
909 */
910 static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* parameters);
911
912 /**
913 * Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
914 * @param source The pointer to the source pixels, must be valid
915 * @param target The pointer to the target pixels receiving the converted pixel data, must be valid
916 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
917 * @param unusedParameters Unused parameter, must be nullptr
918 * @tparam tChannels The number of channels the source (and target) frame have, with range [1, infinity)
919 */
920 template <unsigned int tChannels>
921 static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* unusedParameters = nullptr);
922
923 /**
924 * Adds a channel to a given row with generic (zipped) pixel format and copies the information of the new channel from a one-channel image.
925 * The channel can be added at new first channel or as new last channel.
926 * @param sources The pointer to the multi-channel source frame and to the single-channel source frame, must be valid
927 * @param targets The one pointer to the target image, must be valid
928 * @param multipleRowIndex The index of the multiple-row to be handled, with range [0, height - 1]
929 * @param width The width of the frame in pixel, with range [1, infinity), must be even
930 * @param height The height of the frame in pixel, with range [1, infinity), must be even
931 * @param conversionFlag The conversion to be applied
932 * @param options The 1 options parameters: padding parameters of 1-channel source frame, must be valid
933 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
934 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
935 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
936 */
937 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
938 static void addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options);
939
940 /**
941 * Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified value.
942 * The channel can be added at new first channel or as new last channel.
943 * @param source The pointer to the source pixels, must be valid
944 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
945 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
946 * @param channelValueParameter The pointer to the value of the channel to be set (with data type 'T'), must be valid
947 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
948 * @tparam tSourceChannels Number of channels of the source frame (without the new channel), with range [1, infinity)
949 * @tparam tAddToFront True, to add the channel to the front (as new first channel); False, to add the channel to the back (as new last channel).
950 */
951 template <typename T, unsigned int tSourceChannels, bool tAddToFront>
952 static void addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter);
953
954 /**
955 * Copies one channel from a source row to a target row with generic (zipped) pixel format.
956 * @param source The pointer to the source pixels, must be valid
957 * @param target The pointer to the target pixels, receiving the additional channels, must be valid
958 * @param size The number of source (and target pixels) to convert, with range [1, infinity)
959 * @tparam T Data type of each channel pixel value, e.g, 'uint8_t' or 'float'
960 * @param unusedParameters Unused parameters, must be nullptr
961 * @tparam tSourceChannels Number of channels of the source frame, with range [1, infinity)
962 * @tparam tTargetChannels Number of channels of the target frame, with range [1, infinity)
963 * @tparam tSourceChannelIndex The index of the source channel to be copied, with range [0, tSourceChannels - 1]
964 * @tparam tTargetChannelIndex The index of the target channel to be copied, with range [0, tTargetChannels - 1]
965 */
966 template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
967 static void copyChannelRow(const T* source, T* target, const size_t size, const void* unusedParameters = nullptr);
968
969 protected:
970
971 /**
972 * Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24, FORMAT_BGRA32 into individual frames with one channel only.
973 * @param sourceFrame The frame to be separated, must be valid
974 * @param targetFrames The pointers to the resulting separated frames each holding one channel of the source frame, with already allocated memory
975 * @param width The width of the source frame in pixel, with range [1, infinity)
976 * @param height The height of the source frame in pixel, with range [1, infinity)
977 * @param channels The number of channels the source frame has, with range [1, infinity)
978 * @param sourceFramePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
979 * @param targetFramesPaddingElements The array of padding elements at the end of each target row, one for each target frame, in elements, with range [0, infinity)
980 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
981 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
982 */
983 template <typename TSource, typename TTarget>
984 static void separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements);
985
986 /**
987 * Zips/interleaves 1-channel images into one image with n-channels.
988 * @param sourceFrames The pointers to the individual 1-channel frames, one for each image, must be valid
989 * @param targetFrame The pointer to the resulting zipped frame holding n-channels, must be valid
990 * @param width The width of the source frames in pixel, with range [1, infinity)
991 * @param height The height of the source frames in pixel, with range [1, infinity)
992 * @param channels The number of provided source frames (and the number of channels the target frame will have), with range [1, infinity)
993 * @param sourceFramesPaddingElements The array of padding elements at the end of each source row, one for each source frame, in elements, with range [0, infinity)
994 * @param targetFramePaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
995 * @tparam TSource The data type of each source pixel channel, e.g., 'uint8_t', 'float', ...
996 * @tparam TTarget The data type of each target pixel channel, e.g., 'uint8_t', 'float', ...
997 */
998 template <typename TSource, typename TTarget>
999 static void zipChannelsRuntime(const TSource* const* const sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements);
1000
1001 /**
1002 * Sets one channel of a frame with one unique value.
1003 * @param frame The frame in that one channel of each pixel will be set, must be valid
1004 * @param width The width of the frame in pixel, with range [1, infinity)
1005 * @param value The value to be set
1006 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1007 * @param firstRow First row to be handled
1008 * @param numberRows Number of rows to be handled
1009 * @tparam T Data type of each channel pixel value
1010 * @tparam tChannel Index of the channel that will be inverted, with range [0, tChannels)
1011 * @tparam tChannels Number of data channels of the frames, with range [1, infinity)
1012 */
1013 template <typename T, unsigned int tChannel, unsigned int tChannels>
1014 static void setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1015
1016 /**
1017 * Applies a specific modifier function on each pixel.
1018 * @param source The source frame providing the pixel information, must be valid
1019 * @param target The target frame receiving the pixel information, must be valid
1020 * @param width The width of the source frame in pixel
1021 * @param height The height of the source frame in pixel
1022 * @param conversionFlag The conversion to be applied
1023 * @param firstRow First row to be handled
1024 * @param numberRows Number of rows to be handled
1025 * @tparam T Data type of each channel pixel value
1026 * @tparam tChannels Number of data channels, with range [1, infinity)
1027 * @tparam tPixelFunction Pixel modification function
1028 */
1029 template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
1030 static void applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1031
1032 /**
1033 * Applies a specific modifier function on each pixel.
1034 * @param source The source frame providing the pixel information, must be valid
1035 * @param target The target frame receiving the pixel information, must be valid
1036 * @param width The width of the source frame in pixel, with range [1, infinity)
1037 * @param height The height of the source frame in pixel, with range [1, infinity)
1038 * @param sourcePaddingElements The number of padding elements at the end of each row of the source frame, in elements, with range [0, infinity)
1039 * @param targetPaddingElements The number of padding elements at the end of each row of the target frame, in elements, with range [0, infinity)
1040 * @param conversionFlag The conversion to be applied
1041 * @param firstRow First row to be handled
1042 * @param numberRows Number of rows to be handled
1043 * @tparam TSource Data type of each source channel pixel value
1044 * @tparam TTarget Data type of each target channel pixel value
1045 * @tparam tSourceChannels Number of source data channels, with range [1, infinity)
1046 * @tparam tTargetChannels Number of target data channels, with range [1, infinity)
1047 * @tparam tPixelFunction Pixel modification function
1048 */
1049 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
1050 static void applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1051
1052 /**
1053 * Generic bivariate pixel operations
1054 * @param source0 First source frame
1055 * @param source1 Second source frame
1056 * @param target The target frame
1057 * @param width The width of the source frame in pixel, with range [1, infinity)
1058 * @param height The height of the source frame in pixel, with range [1, infinity)
1059 * @param source0PaddingElements The number of padding elements at the end of each row of the first source, in elements, with range [0, infinity)
1060 * @param source1PaddingElements The number of padding elements at the end of each row of the second source, in elements, with range [0, infinity)
1061 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1062 * @param conversionFlag The conversion to be applied
1063 * @param firstRow First row to be handled
1064 * @param numberRows Number of rows to be handled
1065 * @tparam TSource0 Type of the first data source
1066 * @tparam TSource1 Type of the second data source
1067 * @tparam TTarget Type of the target
1068 * @tparam TIntermediate Type for the computation of intermediate result, e.g. if TSource0 and TSource1 are different
1069 * @tparam tSourceChannels Number of channels of the two sources, range: [1, infinity)
1070 * @tparam tTargetChannels Number of channels of the target, range: [1, infinity)
1071 * @tparam tOperator The operation (function) that is applied on both sources to yield the value for the target (called per pixel)
1072 */
1073 template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
1074 static void applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows);
1075
1076 /**
1077 * Applies a row operator to a subset of all rows of a source image.
1078 * The row operator is given as function pointer and is intended to transform a source row to a target row.<br>
1079 * The function allows to implement e.g., frame filters with few lines of code, source and target frame must have the same size.
1080 * @param source The source frame to which the row operator is applied, must be valid
1081 * @param target The target frame receiving the result of the row operator, must be valid
1082 * @param width The width of the source frame and target frame in pixel, with range [1, infinity)
1083 * @param height The height of the source frame and target frame in pixel, with range [1, infinity)
1084 * @param sourceStrideElements The number of stride elements at the end of each source row, in elements, with range [width * tSourceChannels, infinity)
1085 * @param targetStrideElements The number of padding elements at the end of each target row, in elements, with range [width * tTargetChannels, infinity)
1086 * @param rowOperatorFunction The pointer to the row operator function, must be valid
1087 * @param firstRow The first row to be handled, with range [0, height - 1]
1088 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1089 * @tparam TSource The data type of the source elements
1090 * @tparam TTarget The data type of the target elements
1091 * @tparam tSourceChannels The number of channels the source frame has, with range [1, infinity)
1092 * @tparam tTargetChannels The number of channels the target frame has, with range [1, infinity)
1093 */
1094 template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
1095 static void applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows);
1096
1097 /**
1098 * Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24, to a frame with same pixel format and channel number.
1099 * @param source The source frame buffer, must be valid
1100 * @param target The target frame buffer, must be valid
1101 * @param width The width of the frame in pixel, with range [1, infinity)
1102 * @param height The height of the frame in pixel, with range [1, infinity)
1103 * @param conversionFlag The conversion to be applied
1104 * @param rowReversePixelOrderFunction The function able to reverse the pixel order, must be valid
1105 * @param bytesPerRow The actual number of bytes each row covers, not including optional padding bytes at the end of each row, with range [width, infinity)
1106 * @param sourceStrideBytes The number of bytes between to start points of successive rows in the source frame, with range [0, infinity)
1107 * @param targetStrideBytes The number of bytes between to start points of successive rows in the target frame, with range [0, infinity)
1108 * @param firstRow The first row to be handled, with range [0, height - 1]
1109 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1110 */
1111 static void transformGenericSubset(const uint8_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows);
1112
1113 /**
1114 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1115 * @param frame The image to convert, must be valid
1116 * @param width The width of the image in pixel, with range [1, infinity)
1117 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1118 * @param firstRow The first row to be handled, with range [0, height - 1]
1119 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1120 * @tparam tChannels The number of frame channels, with range [2, infinity)
1121 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1122 */
1123 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1124 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1125
1126 /**
1127 * Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
1128 * @param source The source image to convert, must be valid
1129 * @param target The resulting converted target image, must be valid
1130 * @param width The width of the image in pixel, with range [1, infinity)
1131 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1132 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1133 * @param firstRow The first row to be handled, with range [0, height - 1]
1134 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1135 * @tparam tChannels The number of frame channels, with range [2, infinity)
1136 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1137 */
1138 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1139 static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1140
1141 /**
1142 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1143 * @param frame The image to convert, must be valid
1144 * @param width The width of the image in pixel, with range [1, infinity)
1145 * @param framePaddingElements The number of padding elements at the end of each frame row, in elements, with range [0, infinity)
1146 * @param firstRow The first row to be handled, with range [0, height - 1]
1147 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1148 * @tparam tChannels The number of frame channels, with range [2, infinity)
1149 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1150 */
1151 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1152 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1153
1154 /**
1155 * Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied alpha.
1156 * @param source The source image to convert, must be valid
1157 * @param target The resulting converted target image, must be valid
1158 * @param width The width of the image in pixel, with range [1, infinity)
1159 * @param sourcePaddingElements The number of padding elements at the end of each source row, in elements, with range [0, infinity)
1160 * @param targetPaddingElements The number of padding elements at the end of each target row, in elements, with range [0, infinity)
1161 * @param firstRow The first row to be handled, with range [0, height - 1]
1162 * @param numberRows The number of rows to be handled, with range [1, height - firstRow]
1163 * @tparam tChannels The number of frame channels, with range [2, infinity)
1164 * @tparam tAlphaChannelIndex The index of the alpha channel, with range [0, tChannels - 1]
1165 */
1166 template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
1167 static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows);
1168
1169#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
1170
1171 /**
1172 * Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the three channels.
1173 * This function can be used to e.g., convert RGB24 to Y8, or RGB24 to Y8.
1174 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1175 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1176 * @param source The pointer to the 16 source pixels (with 3 channels = 64 bytes) to convert, must be valid
1177 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1178 * @param multiplicationFactors0_128_u_16x8 The multiplication factor for the first channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1179 * @param multiplicationFactors1_128_u_16x8 The multiplication factor for the second channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1180 * @param multiplicationFactors2_128_u_16x8 The multiplication factor for the third channel (8 identical 16 bit values), with ranges [0, 128], while the sum of all three factors must be 128
1181 */
1182 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8);
1183
1184 /**
1185 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1186 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1187 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1188 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1189 * The transformation is based on the following pattern:
1190 * <pre>
1191 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1192 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1193 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1194 * </pre>
1195 * With t target, s source, f factor, and b bias.
1196 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1197 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1198 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1199 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1200 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1201 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1202 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1203 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1204 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1205 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1206 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1207 * @param biasChannel0_s_16x8 The bias (translation) value for the first target channel, with range [-127, 127]
1208 * @param biasChannel1_s_16x8 The bias (translation) value for the second target channel, with range [-127, 127]
1209 * @param biasChannel2_s_16x8 The bias (translation) value for the third target channel, with range [-127, 127]
1210 */
1211 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8);
1212
1213 /**
1214 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1215 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1216 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1217 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1218 * The transformation is based on the following pattern:
1219 * <pre>
1220 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1221 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1222 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1223 * </pre>
1224 * With t target, s source, f factor, and b bias.
1225 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1226 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1227 * @param factorChannel00_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1228 * @param factorChannel10_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1229 * @param factorChannel20_1024_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1230 * @param factorChannel01_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1231 * @param factorChannel11_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1232 * @param factorChannel21_1024_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1233 * @param factorChannel02_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-1024 * 16, 1024 * 16]
1234 * @param factorChannel12_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-1024 * 16, 1024 * 16]
1235 * @param factorChannel22_1024_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-1024 * 16, 1024 * 16]
1236 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-1024 * 16, 1024 * 16]
1237 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-1024 * 16, 1024 * 16]
1238 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-1024 * 16, 1024 * 16]
1239 */
1240 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4);
1241
1242 /**
1243 * Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear combination of the four channels.
1244 * This function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1245 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1246 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1247 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1248 * @param target The pointer to the 16 target pixels (with 1 channel = 16 bytes) receiving the converted pixel data, must be valid
1249 * @param multiplicationFactors0123_128_s_32x The four individual multiplication factors, one for each channel, with ranges [0, 127], while the sum of all four factors must be 128
1250 */
1251 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x);
1252
1253 /**
1254 * Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear combination of the four channels.
1255 * This function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1256 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1257 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1258 * @param source The pointer to the 16 source pixels (with 4 channels = 64 bytes) to convert, must be valid
1259 * @param target The pointer to the 16 target pixels (with 2 channel = 32 bytes) receiving the converted pixel data, must be valid
1260 * @param multiplicationFactorsChannel0_0123_128_s_16x8 The four individual multiplication factors for the first target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1261 * @param multiplicationFactorsChannel1_0123_128_s_16x8 The four individual multiplication factors for the second target channel (two sets), one for each source channel, with ranges [0, 128], while the sum of all four factors must be 128
1262 */
1263 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8);
1264
1265#endif // OCEAN_HARDWARE_SSE_VERSION >= 41
1266
1267#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1268
1269 /**
1270 * Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the three channels.
1271 * Thus, this function can be used to e.g., convert RGB24 to Y8, or BGR24 to Y8.
1272 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1273 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1274 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1275 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1276 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 128]
1277 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 128 - factorChannel0 - factorChannel2]
1278 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 128 - factorChannel0 - factorChannel1]
1279 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1280 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1281 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1282 */
1283 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
1284 static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8);
1285
1286 /**
1287 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1288 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1289 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1290 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1291 * The transformation is based on the following pattern:
1292 * <pre>
1293 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1294 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1295 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1296 * </pre>
1297 * With t target, s source, f factor, and b bias/translation.
1298 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1299 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1300 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1301 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1302 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1303 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1304 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1305 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1306 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1307 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1308 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1309 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1310 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1311 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1312 */
1313 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1314
1315 /**
1316 * Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus an in advance bias (translation) parameter.
1317 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or RGB24 to YUV24.
1318 * The linear combination is defined by three integer multiplication factor for each source channel with 64 as denominator. plus one bias (translation) parameter for each source channel (with 1 as denominator).<br>
1319 * Beware: As this function applies integer multiplication factors (with 6 bits precision) the conversion result has an accuracy of +/- 4 color intensities.<br>
1320 * The transformation is based on the following pattern:
1321 * <pre>
1322 * t0 = clamp(0, f00 * (s0 - b0) + f01 * (s1 - b1) + f02 * (s2 - b2), 255)
1323 * t1 = clamp(0, f10 * (s0 - b0) + f11 * (s1 - b1) + f12 * (s2 - b2), 255)
1324 * t2 = clamp(0, f20 * (s0 - b0) + f21 * (s1 - b1) + f22 * (s2 - b2), 255)
1325 * </pre>
1326 * With t target, s source, f factor, and b bias/translation.
1327 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1328 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1329 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1330 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1331 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1332 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1333 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1334 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1335 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1336 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1337 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1338 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1339 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1340 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 128]
1341 */
1342 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8);
1343
1344 /**
1345 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1346 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1347 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (also with 128 as denominator).<br>
1348 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1349 * The transformation is based on the following pattern:
1350 * <pre>
1351 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1352 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1353 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1354 * </pre>
1355 * With t target, s source, f factor, and b bias.
1356 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1357 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1358 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1359 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1360 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1361 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1362 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1363 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1364 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1365 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127]
1366 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1367 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1368 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1369 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1370 */
1371 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1372
1373 /**
1374 * Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1375 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1376 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1377 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1378 * The transformation is based on the following pattern:
1379 * <pre>
1380 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1381 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1382 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1383 * </pre>
1384 * With t target, s source, f factor, and b bias.
1385 * @param source The pointer to the 8 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1386 * @param target The pointer to the 8 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1387 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1388 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1389 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1390 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1391 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1392 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1393 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1394 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1395 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1396 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1397 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1398 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1399 */
1400 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1401
1402 /**
1403 * Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1404 * Thus, this function can be used to e.g., convert YUV24 to RGB24, or YVU24 to BGR24.
1405 * The linear combination is defined by three integer multiplication factor for each source channel with 1024 as denominator. plus one bias (translation) parameter for each source channel (also with 1024 as denominator).<br>
1406 * Beware: As this function applies integer multiplication factors (with 10 bits precision) the conversion result has an accuracy of +/- 1 color intensities.<br>
1407 * The transformation is based on the following pattern:
1408 * <pre>
1409 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1410 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1411 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1412 * </pre>
1413 * With t target, s source, f factor, and b bias.
1414 * @param source The pointer to the 16 source pixels (with 3 channels = 24 bytes) to convert, must be valid
1415 * @param target The pointer to the 16 target pixels (with 3 channels = 24 bytes) receiving the converted pixel data, must be valid
1416 * @param factorChannel00_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the first target channel, with range [-32767, 32767]
1417 * @param factorChannel10_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the second target channel, with range [-32767, 32767]
1418 * @param factorChannel20_1024_s_16x4 The multiplication factor (4 identical factors) for the first source channel and for the third target channel, with range [-32767, 32767]
1419 * @param factorChannel01_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the first target channel, with range [-32767, 32767]
1420 * @param factorChannel11_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the second target channel, with range [-32767, 32767]
1421 * @param factorChannel21_1024_s_16x4 The multiplication factor (4 identical factors) for the second source channel and for the third target channel, with range [-32767, 32767]
1422 * @param factorChannel02_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the first target channel, with range [-32767, 32767]
1423 * @param factorChannel12_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the second target channel, with range [-32767, 32767
1424 * @param factorChannel22_1024_s_16x4 The multiplication factor (4 identical factors) for the third source channel and for the third target channel, with range [-32767, 32767]
1425 * @param biasChannel0_1024_s_32x4 The bias (translation) value for the first target channel, with range [-32767, 32767]
1426 * @param biasChannel1_1024_s_32x4 The bias (translation) value for the second target channel, with range [-32767, 32767]
1427 * @param biasChannel2_1024_s_32x4 The bias (translation) value for the third target channel, with range [-32767, 32767]
1428 */
1429 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4);
1430
1431 /**
1432 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1433 * Thus, this function can be used to e.g., convert RGB24 to YUV24, or YUV24 to RGB24.
1434 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1435 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1436 * The transformation is based on the following pattern:
1437 * <pre>
1438 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1439 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1440 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1441 * </pre>
1442 * With t target, s source, f factor, and b bias.
1443 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1444 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1445 * @param factorChannel00_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1446 * @param factorChannel10_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1447 * @param factorChannel20_128_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1448 * @param factorChannel01_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1449 * @param factorChannel11_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1450 * @param factorChannel21_128_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1451 * @param factorChannel02_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1452 * @param factorChannel12_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1453 * @param factorChannel22_128_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1454 * @param biasChannel0_128_s_16x8 The bias (translation) value for the first target channel, with range [-128 * 128, 128 * 128]
1455 * @param biasChannel1_128_s_16x8 The bias (translation) value for the second target channel, with range [-128 * 128, 128 * 128]
1456 * @param biasChannel2_128_s_16x8 The bias (translation) value for the third target channel, with range [-128 * 128, 128 * 128]
1457 */
1458 static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8);
1459
1460 /**
1461 * Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combination of the three channels plus a bias (translation) parameter.
1462 * The fourth channel is set to a constant value, e.g., for an alpha channel.<br>
1463 * Thus, this function can be used to e.g., convert YUV24 to RGBA32, or YVU24 to BGRA32.<br>
1464 * The linear combination is defined by three integer multiplication factor for each source channel with 128 as denominator. plus one bias (translation) parameter for each source channel (with 128 as denominator).<br>
1465 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.<br>
1466 * The transformation is based on the following pattern:
1467 * <pre>
1468 * t0 = f00 * s0 + f01 * s1 + f02 * s2 + b0
1469 * t1 = f10 * s0 + f11 * s1 + f12 * s2 + b1
1470 * t2 = f20 * s0 + f21 * s1 + f22 * s2 + b2
1471 * t3 = valueChannel3
1472 * </pre>
1473 * With t target, s source, f factor, and b bias.
1474 * @param source The pointer to the 16 source pixels (with 3 channels = 48 bytes) to convert, must be valid
1475 * @param target The pointer to the 16 target pixels (with 3 channels = 48 bytes) receiving the converted pixel data, must be valid
1476 * @param factorChannel00_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the first target channel, with range [-127, 127]
1477 * @param factorChannel10_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the second target channel, with range [-127, 127]
1478 * @param factorChannel20_64_s_16x8 The multiplication factor (8 identical factors) for the first source channel and for the third target channel, with range [-127, 127]
1479 * @param factorChannel01_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the first target channel, with range [-127, 127]
1480 * @param factorChannel11_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the second target channel, with range [-127, 127]
1481 * @param factorChannel21_64_s_16x8 The multiplication factor (8 identical factors) for the second source channel and for the third target channel, with range [-127, 127]
1482 * @param factorChannel02_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the first target channel, with range [-127, 127]
1483 * @param factorChannel12_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the second target channel, with range [-127, 127
1484 * @param factorChannel22_64_s_16x8 The multiplication factor (8 identical factors) for the third source channel and for the third target channel, with range [-127, 127]
1485 * @param biasChannel0_u_8x8 The bias (translation) value for the first target channel, with range [0, 128]
1486 * @param biasChannel1_u_8x8 The bias (translation) value for the second target channel, with range [0, 128]
1487 * @param biasChannel2_u_8x8 The bias (translation) value for the third target channel, with range [0, 138]
1488 * @param channelValue3_u_8x16 The constant value for the fourth target channel, with range [0, 255]
1489 */
1490 static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16);
1491
1492 /**
1493 * Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combination of the four channels.
1494 * Thus, this function can be used to e.g., convert RGBA32 to Y8, or ARGB32 to Y8, or RGB32 to Y8.
1495 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1496 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1497 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1498 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1499 * @param factorChannel0_128_u_8x8 The multiplication factor (8 identical factors) for the first channel, with range [0, 127]
1500 * @param factorChannel1_128_u_8x8 The multiplication factor (8 identical factors) for the second channel, with range [0, 127 - factorChannel0 - factorChannel2 - factorChannel3]
1501 * @param factorChannel2_128_u_8x8 The multiplication factor (8 identical factors) for the third channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel3]
1502 * @param factorChannel3_128_u_8x8 The multiplication factor (8 identical factors) for the fourth channel, with range [0, 127 - factorChannel0 - factorChannel1 - factorChannel2]
1503 * @tparam tUseFactorChannel0 True, if the value(s) of factorChannel0 is not zero; False, if the value(s) of factorChannel0 is zero
1504 * @tparam tUseFactorChannel1 True, if the value(s) of factorChannel1 is not zero; False, if the value(s) of factorChannel1 is zero
1505 * @tparam tUseFactorChannel2 True, if the value(s) of factorChannel2 is not zero; False, if the value(s) of factorChannel2 is zero
1506 * @tparam tUseFactorChannel3 True, if the value(s) of factorChannel3 is not zero; False, if the value(s) of factorChannel3 is zero
1507 */
1508 template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
1509 static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8);
1510
1511 /**
1512 * Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combination of the four channels.
1513 * Thus, this function can be used to e.g., convert RGBA32 to YA16, or ARGB32 to AY16.
1514 * The linear combination is defined by one integer multiplication factor for each channel with 128 as denominator.<br>
1515 * Beware: As this function applies integer multiplication factors (with 7 bits precision) the conversion result has an accuracy of +/- 2 color intensities.
1516 * @param source The pointer to the 8 source pixels (with 4 channels = 32 bytes) to convert, must be valid
1517 * @param target The pointer to the 8 target pixels (with 1 channel = 8 bytes) receiving the converted pixel data, must be valid
1518 * @param factorChannel00_128_u_8x8 The multiplication factor (8 identical factors) for the first target and first source channel, with range [0, 127]
1519 * @param factorChannel10_128_u_8x8 The multiplication factor (8 identical factors) for the second target and first source channel, with range [0, 127]
1520 * @param factorChannel01_128_u_8x8 The multiplication factor (8 identical factors) for the first target and second source channel, with range [0, 127 - factorChannel00 - factorChannel02 - factorChannel03]
1521 * @param factorChannel11_128_u_8x8 The multiplication factor (8 identical factors) for the second target and second source channel, with range [0, 127 - factorChannel10 - factorChannel12 - factorChannel13]
1522 * @param factorChannel02_128_u_8x8 The multiplication factor (8 identical factors) for the first target and third source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel03]
1523 * @param factorChannel12_128_u_8x8 The multiplication factor (8 identical factors) for the second target and third source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel13]
1524 * @param factorChannel03_128_u_8x8 The multiplication factor (8 identical factors) for the first target and fourth source channel, with range [0, 127 - factorChannel00 - factorChannel01 - factorChannel02]
1525 * @param factorChannel13_128_u_8x8 The multiplication factor (8 identical factors) for the second target and fourth source channel, with range [0, 127 - factorChannel10 - factorChannel11 - factorChannel12]
1526 */
1527 static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8);
1528
1529#endif // OCEAN_HARDWARE_NEON_VERSION >= 10
1530
1531};
1532
1533#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1534
1535template <>
1536inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 2u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1537{
1538 ocean_assert(sourceFrame != nullptr);
1539 ocean_assert(targetFrames != nullptr);
1540
1541 ocean_assert(width != 0u && height != 0u);
1542 ocean_assert(channels == 2u);
1543
1544 constexpr unsigned int tChannels = 2u;
1545
1546 bool allTargetFramesContinuous = true;
1547
1548 if (targetFramesPaddingElements != nullptr)
1549 {
1550 for (unsigned int n = 0u; n < tChannels; ++n)
1551 {
1552 if (targetFramesPaddingElements[n] != 0u)
1553 {
1554 allTargetFramesContinuous = false;
1555 break;
1556 }
1557 }
1558 }
1559
1560 const uint8_t* source = sourceFrame;
1561 uint8_t* target0 = targetFrames[0];
1562 uint8_t* target1 = targetFrames[1];
1563
1564 constexpr unsigned int tBlockSize = 16u;
1565
1566 uint8x16x2_t source_8x16x2;
1567
1568 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1569 {
1570 const unsigned int pixels = width * height;
1571 const unsigned int blocks = pixels / tBlockSize;
1572 const unsigned int remaining = pixels % tBlockSize;
1573
1574 for (unsigned int n = 0u; n < blocks; ++n)
1575 {
1576 source_8x16x2 = vld2q_u8(source);
1577
1578 vst1q_u8(target0, source_8x16x2.val[0]);
1579 vst1q_u8(target1, source_8x16x2.val[1]);
1580
1581 source += tBlockSize * tChannels;
1582
1583 target0 += tBlockSize;
1584 target1 += tBlockSize;
1585 }
1586
1587 for (unsigned int n = 0u; n < remaining; ++n)
1588 {
1589 target0[n] = source[n * tChannels + 0u];
1590 target1[n] = source[n * tChannels + 1u];
1591 }
1592 }
1593 else
1594 {
1595 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1596 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1597
1598 const unsigned int blocks = width / tBlockSize;
1599 const unsigned int remaining = width % tBlockSize;
1600
1601 for (unsigned int y = 0u; y < height; ++y)
1602 {
1603 for (unsigned int n = 0u; n < blocks; ++n)
1604 {
1605 source_8x16x2 = vld2q_u8(source);
1606
1607 vst1q_u8(target0, source_8x16x2.val[0]);
1608 vst1q_u8(target1, source_8x16x2.val[1]);
1609
1610 source += tBlockSize * tChannels;
1611
1612 target0 += tBlockSize;
1613 target1 += tBlockSize;
1614 }
1615
1616 for (unsigned int n = 0u; n < remaining; ++n)
1617 {
1618 target0[n] = source[n * tChannels + 0u];
1619 target1[n] = source[n * tChannels + 1u];
1620 }
1621
1622 source += remaining * tChannels + sourceFramePaddingElements;
1623 target0 += remaining + targetFrame0PaddingElements;
1624 target1 += remaining + targetFrame1PaddingElements;
1625 }
1626 }
1627}
1628
1629template <>
1630inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 3u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1631{
1632 ocean_assert(sourceFrame != nullptr);
1633 ocean_assert(targetFrames != nullptr);
1634
1635 ocean_assert(width != 0u && height != 0u);
1636 ocean_assert(channels == 3u);
1637
1638 constexpr unsigned int tChannels = 3u;
1639
1640 bool allTargetFramesContinuous = true;
1641
1642 if (targetFramesPaddingElements != nullptr)
1643 {
1644 for (unsigned int n = 0u; n < tChannels; ++n)
1645 {
1646 if (targetFramesPaddingElements[n] != 0u)
1647 {
1648 allTargetFramesContinuous = false;
1649 break;
1650 }
1651 }
1652 }
1653
1654 const uint8_t* source = sourceFrame;
1655 uint8_t* target0 = targetFrames[0];
1656 uint8_t* target1 = targetFrames[1];
1657 uint8_t* target2 = targetFrames[2];
1658
1659 constexpr unsigned int tBlockSize = 16u;
1660
1661 uint8x16x3_t source_8x16x3;
1662
1663 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1664 {
1665 const unsigned int pixels = width * height;
1666 const unsigned int blocks = pixels / tBlockSize;
1667 const unsigned int remaining = pixels % tBlockSize;
1668
1669 for (unsigned int n = 0u; n < blocks; ++n)
1670 {
1671 source_8x16x3 = vld3q_u8(source);
1672
1673 vst1q_u8(target0, source_8x16x3.val[0]);
1674 vst1q_u8(target1, source_8x16x3.val[1]);
1675 vst1q_u8(target2, source_8x16x3.val[2]);
1676
1677 source += tBlockSize * tChannels;
1678
1679 target0 += tBlockSize;
1680 target1 += tBlockSize;
1681 target2 += tBlockSize;
1682 }
1683
1684 for (unsigned int n = 0u; n < remaining; ++n)
1685 {
1686 target0[n] = source[n * tChannels + 0u];
1687 target1[n] = source[n * tChannels + 1u];
1688 target2[n] = source[n * tChannels + 2u];
1689 }
1690 }
1691 else
1692 {
1693 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1694 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1695 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1696
1697 const unsigned int blocks = width / tBlockSize;
1698 const unsigned int remaining = width % tBlockSize;
1699
1700 for (unsigned int y = 0u; y < height; ++y)
1701 {
1702 for (unsigned int n = 0u; n < blocks; ++n)
1703 {
1704 source_8x16x3 = vld3q_u8(source);
1705
1706 vst1q_u8(target0, source_8x16x3.val[0]);
1707 vst1q_u8(target1, source_8x16x3.val[1]);
1708 vst1q_u8(target2, source_8x16x3.val[2]);
1709
1710 source += tBlockSize * tChannels;
1711
1712 target0 += tBlockSize;
1713 target1 += tBlockSize;
1714 target2 += tBlockSize;
1715 }
1716
1717 for (unsigned int n = 0u; n < remaining; ++n)
1718 {
1719 target0[n] = source[n * tChannels + 0u];
1720 target1[n] = source[n * tChannels + 1u];
1721 target2[n] = source[n * tChannels + 2u];
1722 }
1723
1724 source += remaining * tChannels + sourceFramePaddingElements;
1725 target0 += remaining + targetFrame0PaddingElements;
1726 target1 += remaining + targetFrame1PaddingElements;
1727 target2 += remaining + targetFrame2PaddingElements;
1728 }
1729 }
1730}
1731
1732template <>
1733inline void FrameChannels::separateTo1Channel<uint8_t, uint8_t, 4u>(const uint8_t* const sourceFrame, uint8_t* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1734{
1735 ocean_assert(sourceFrame != nullptr);
1736 ocean_assert(targetFrames != nullptr);
1737
1738 ocean_assert(width != 0u && height != 0u);
1739 ocean_assert(channels == 4u);
1740
1741 constexpr unsigned int tChannels = 4u;
1742
1743 bool allTargetFramesContinuous = true;
1744
1745 if (targetFramesPaddingElements != nullptr)
1746 {
1747 for (unsigned int n = 0u; n < tChannels; ++n)
1748 {
1749 if (targetFramesPaddingElements[n] != 0u)
1750 {
1751 allTargetFramesContinuous = false;
1752 break;
1753 }
1754 }
1755 }
1756
1757 const uint8_t* source = sourceFrame;
1758 uint8_t* target0 = targetFrames[0];
1759 uint8_t* target1 = targetFrames[1];
1760 uint8_t* target2 = targetFrames[2];
1761 uint8_t* target3 = targetFrames[3];
1762
1763 constexpr unsigned int tBlockSize = 16u;
1764
1765 uint8x16x4_t source_8x16x4;
1766
1767 if (allTargetFramesContinuous && sourceFramePaddingElements == 0u)
1768 {
1769 const unsigned int pixels = width * height;
1770 const unsigned int blocks = pixels / tBlockSize;
1771 const unsigned int remaining = pixels % tBlockSize;
1772
1773 for (unsigned int n = 0u; n < blocks; ++n)
1774 {
1775 source_8x16x4 = vld4q_u8(source);
1776
1777 vst1q_u8(target0, source_8x16x4.val[0]);
1778 vst1q_u8(target1, source_8x16x4.val[1]);
1779 vst1q_u8(target2, source_8x16x4.val[2]);
1780 vst1q_u8(target3, source_8x16x4.val[3]);
1781
1782 source += tBlockSize * tChannels;
1783
1784 target0 += tBlockSize;
1785 target1 += tBlockSize;
1786 target2 += tBlockSize;
1787 target3 += tBlockSize;
1788 }
1789
1790 for (unsigned int n = 0u; n < remaining; ++n)
1791 {
1792 target0[n] = source[n * tChannels + 0u];
1793 target1[n] = source[n * tChannels + 1u];
1794 target2[n] = source[n * tChannels + 2u];
1795 target3[n] = source[n * tChannels + 3u];
1796 }
1797 }
1798 else
1799 {
1800 const unsigned int targetFrame0PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[0];
1801 const unsigned int targetFrame1PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[1];
1802 const unsigned int targetFrame2PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[2];
1803 const unsigned int targetFrame3PaddingElements = targetFramesPaddingElements == nullptr ? 0u : targetFramesPaddingElements[3];
1804
1805 const unsigned int blocks = width / tBlockSize;
1806 const unsigned int remaining = width % tBlockSize;
1807
1808 for (unsigned int y = 0u; y < height; ++y)
1809 {
1810 for (unsigned int n = 0u; n < blocks; ++n)
1811 {
1812 source_8x16x4 = vld4q_u8(source);
1813
1814 vst1q_u8(target0, source_8x16x4.val[0]);
1815 vst1q_u8(target1, source_8x16x4.val[1]);
1816 vst1q_u8(target2, source_8x16x4.val[2]);
1817 vst1q_u8(target3, source_8x16x4.val[3]);
1818
1819 source += tBlockSize * tChannels;
1820
1821 target0 += tBlockSize;
1822 target1 += tBlockSize;
1823 target2 += tBlockSize;
1824 target3 += tBlockSize;
1825 }
1826
1827 for (unsigned int n = 0u; n < remaining; ++n)
1828 {
1829 target0[n] = source[n * tChannels + 0u];
1830 target1[n] = source[n * tChannels + 1u];
1831 target2[n] = source[n * tChannels + 2u];
1832 target3[n] = source[n * tChannels + 3u];
1833 }
1834
1835 source += remaining * tChannels + sourceFramePaddingElements;
1836 target0 += remaining + targetFrame0PaddingElements;
1837 target1 += remaining + targetFrame1PaddingElements;
1838 target2 += remaining + targetFrame2PaddingElements;
1839 target3 += remaining + targetFrame3PaddingElements;
1840 }
1841 }
1842}
1843
1844#endif // OCEAN_HARDWARE_NEON_VERSION
1845
1846template <typename TSource, typename TTarget, unsigned int tChannels>
1847void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
1848{
1849 ocean_assert(sourceFrame != nullptr);
1850 ocean_assert(targetFrames != nullptr);
1851
1852 ocean_assert(width != 0u && height != 0u);
1853
1854 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
1855
1856 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
1857 {
1858 separateTo1ChannelRuntime<TSource, TTarget>(sourceFrame, targetFrames, width, height, channels, sourceFramePaddingElements, targetFramesPaddingElements);
1859 return;
1860 }
1861
1862#ifdef OCEAN_DEBUG
1863 for (unsigned int c = 0u; c < tChannels; ++c)
1864 {
1865 ocean_assert(targetFrames[c] != nullptr);
1866 }
1867#endif
1868
1869 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
1870 {
1871 for (unsigned int n = 0u; n < width * height; ++n)
1872 {
1873 for (unsigned int c = 0u; c < tChannels; ++c)
1874 {
1875 targetFrames[c][n] = TTarget(sourceFrame[n * tChannels + c]);
1876 }
1877 }
1878 }
1879 else if (targetFramesPaddingElements == nullptr)
1880 {
1881 ocean_assert(sourceFramePaddingElements != 0u);
1882
1883 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1884
1885 for (unsigned int y = 0u; y < height; ++y)
1886 {
1887 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1888
1889 const unsigned int targetRowOffset = y * width;
1890
1891 for (unsigned int x = 0u; x < width; ++x)
1892 {
1893 for (unsigned int c = 0u; c < tChannels; ++c)
1894 {
1895 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * tChannels + c));
1896 }
1897 }
1898 }
1899 }
1900 else
1901 {
1902 const unsigned int sourceFrameStrideElements = width * tChannels + sourceFramePaddingElements;
1903
1904 Indices32 targetFrameStrideElements(tChannels);
1905
1906 for (unsigned int c = 0u; c < tChannels; ++c)
1907 {
1908 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
1909 }
1910
1911 for (unsigned int y = 0u; y < height; ++y)
1912 {
1913 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
1914
1915 for (unsigned int x = 0u; x < width; ++x)
1916 {
1917 for (unsigned int c = 0u; c < tChannels; ++c)
1918 {
1919 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * tChannels + c));
1920 }
1921 }
1922 }
1923 }
1924}
1925
1926template <typename TSource, typename TTarget>
1927void FrameChannels::separateTo1Channel(const TSource* const sourceFrame, const std::initializer_list<TTarget*>& targetFrames, const unsigned int width, const unsigned int height, const unsigned int sourceFramePaddingElements, const std::initializer_list<const unsigned int>& targetFramesPaddingElements)
1928{
1929 ocean_assert(targetFrames.size() >= 1);
1930 ocean_assert(targetFramesPaddingElements.size() == 0 || targetFrames.size() == targetFramesPaddingElements.size());
1931
1932 if (targetFrames.size() == 2)
1933 {
1934 separateTo1Channel<TSource, TTarget, 2u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1935 }
1936 else if (targetFrames.size() == 3)
1937 {
1938 separateTo1Channel<TSource, TTarget, 3u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1939 }
1940 else if (targetFrames.size() == 4)
1941 {
1942 separateTo1Channel<TSource, TTarget, 4u>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1943 }
1944 else
1945 {
1946 separateTo1Channel<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrame, targetFrames.begin(), width, height, (unsigned int)(targetFrames.size()), sourceFramePaddingElements, targetFramesPaddingElements.size() == 0 ? nullptr : targetFramesPaddingElements.begin());
1947 }
1948}
1949
1950#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
1951
1952template <>
1953inline void FrameChannels::zipChannels<uint8_t, uint8_t, 2u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
1954{
1955 ocean_assert(sourceFrames != nullptr);
1956 ocean_assert(targetFrame != nullptr);
1957
1958 ocean_assert(width != 0u && height != 0u);
1959 ocean_assert(channels == 2u);
1960
1961 constexpr unsigned int tChannels = 2u;
1962
1963 bool allSourceFramesContinuous = true;
1964
1965 if (sourceFramesPaddingElements != nullptr)
1966 {
1967 for (unsigned int n = 0u; n < tChannels; ++n)
1968 {
1969 if (sourceFramesPaddingElements[n] != 0u)
1970 {
1971 allSourceFramesContinuous = false;
1972 break;
1973 }
1974 }
1975 }
1976
1977 const uint8_t* source0 = sourceFrames[0];
1978 const uint8_t* source1 = sourceFrames[1];
1979 uint8_t* target = targetFrame;
1980
1981 constexpr unsigned int tBlockSize = 16u;
1982
1983 uint8x16x2_t source_8x16x2;
1984
1985 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
1986 {
1987 const unsigned int pixels = width * height;
1988 const unsigned int blocks = pixels / tBlockSize;
1989 const unsigned int remaining = pixels % tBlockSize;
1990
1991 for (unsigned int n = 0u; n < blocks; ++n)
1992 {
1993 source_8x16x2.val[0] = vld1q_u8(source0);
1994 source_8x16x2.val[1] = vld1q_u8(source1);
1995
1996 vst2q_u8(target, source_8x16x2);
1997
1998 source0 += tBlockSize;
1999 source1 += tBlockSize;
2000
2001 target += tBlockSize * tChannels;
2002 }
2003
2004 for (unsigned int n = 0u; n < remaining; ++n)
2005 {
2006 target[n * tChannels + 0u] = source0[n];
2007 target[n * tChannels + 1u] = source1[n];
2008 }
2009 }
2010 else
2011 {
2012 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2013 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2014
2015 const unsigned int blocks = width / tBlockSize;
2016 const unsigned int remaining = width % tBlockSize;
2017
2018 for (unsigned int y = 0u; y < height; ++y)
2019 {
2020 for (unsigned int n = 0u; n < blocks; ++n)
2021 {
2022 source_8x16x2.val[0] = vld1q_u8(source0);
2023 source_8x16x2.val[1] = vld1q_u8(source1);
2024
2025 vst2q_u8(target, source_8x16x2);
2026
2027 source0 += tBlockSize;
2028 source1 += tBlockSize;
2029
2030 target += tBlockSize * tChannels;
2031 }
2032
2033 for (unsigned int n = 0u; n < remaining; ++n)
2034 {
2035 target[n * tChannels + 0u] = source0[n];
2036 target[n * tChannels + 1u] = source1[n];
2037 }
2038
2039 source0 += remaining + sourceFrame0PaddingElements;
2040 source1 += remaining + sourceFrame1PaddingElements;
2041 target += remaining * tChannels + targetFramePaddingElements;
2042 }
2043 }
2044}
2045
2046template <>
2047inline void FrameChannels::zipChannels<uint8_t, uint8_t, 3u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2048{
2049 ocean_assert(sourceFrames != nullptr);
2050 ocean_assert(targetFrame != nullptr);
2051
2052 ocean_assert(width != 0u && height != 0u);
2053 ocean_assert(channels == 3u);
2054
2055 constexpr unsigned int tChannels = 3u;
2056
2057 bool allSourceFramesContinuous = true;
2058
2059 if (sourceFramesPaddingElements != nullptr)
2060 {
2061 for (unsigned int n = 0u; n < tChannels; ++n)
2062 {
2063 if (sourceFramesPaddingElements[n] != 0u)
2064 {
2065 allSourceFramesContinuous = false;
2066 break;
2067 }
2068 }
2069 }
2070
2071 const uint8_t* source0 = sourceFrames[0];
2072 const uint8_t* source1 = sourceFrames[1];
2073 const uint8_t* source2 = sourceFrames[2];
2074 uint8_t* target = targetFrame;
2075
2076 constexpr unsigned int tBlockSize = 16u;
2077
2078 uint8x16x3_t source_8x16x3;
2079
2080 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2081 {
2082 const unsigned int pixels = width * height;
2083 const unsigned int blocks = pixels / tBlockSize;
2084 const unsigned int remaining = pixels % tBlockSize;
2085
2086 for (unsigned int n = 0u; n < blocks; ++n)
2087 {
2088 source_8x16x3.val[0] = vld1q_u8(source0);
2089 source_8x16x3.val[1] = vld1q_u8(source1);
2090 source_8x16x3.val[2] = vld1q_u8(source2);
2091
2092 vst3q_u8(target, source_8x16x3);
2093
2094 source0 += tBlockSize;
2095 source1 += tBlockSize;
2096 source2 += tBlockSize;
2097
2098 target += tBlockSize * tChannels;
2099 }
2100
2101 for (unsigned int n = 0u; n < remaining; ++n)
2102 {
2103 target[n * tChannels + 0u] = source0[n];
2104 target[n * tChannels + 1u] = source1[n];
2105 target[n * tChannels + 2u] = source2[n];
2106 }
2107 }
2108 else
2109 {
2110 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2111 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2112 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2113
2114 const unsigned int blocks = width / tBlockSize;
2115 const unsigned int remaining = width % tBlockSize;
2116
2117 for (unsigned int y = 0u; y < height; ++y)
2118 {
2119 for (unsigned int n = 0u; n < blocks; ++n)
2120 {
2121 source_8x16x3.val[0] = vld1q_u8(source0);
2122 source_8x16x3.val[1] = vld1q_u8(source1);
2123 source_8x16x3.val[2] = vld1q_u8(source2);
2124
2125 vst3q_u8(target, source_8x16x3);
2126
2127 source0 += tBlockSize;
2128 source1 += tBlockSize;
2129 source2 += tBlockSize;
2130
2131 target += tBlockSize * tChannels;
2132 }
2133
2134 for (unsigned int n = 0u; n < remaining; ++n)
2135 {
2136 target[n * tChannels + 0u] = source0[n];
2137 target[n * tChannels + 1u] = source1[n];
2138 target[n * tChannels + 2u] = source2[n];
2139 }
2140
2141 source0 += remaining + sourceFrame0PaddingElements;
2142 source1 += remaining + sourceFrame1PaddingElements;
2143 source2 += remaining + sourceFrame2PaddingElements;
2144 target += remaining * tChannels + targetFramePaddingElements;
2145 }
2146 }
2147}
2148
2149template <>
2150inline void FrameChannels::zipChannels<uint8_t, uint8_t, 4u>(const uint8_t* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2151{
2152 ocean_assert(sourceFrames != nullptr);
2153 ocean_assert(targetFrame != nullptr);
2154
2155 ocean_assert(width != 0u && height != 0u);
2156 ocean_assert(channels == 4u);
2157
2158 constexpr unsigned int tChannels = 4u;
2159
2160 bool allSourceFramesContinuous = true;
2161
2162 if (sourceFramesPaddingElements != nullptr)
2163 {
2164 for (unsigned int n = 0u; n < tChannels; ++n)
2165 {
2166 if (sourceFramesPaddingElements[n] != 0u)
2167 {
2168 allSourceFramesContinuous = false;
2169 break;
2170 }
2171 }
2172 }
2173
2174 const uint8_t* source0 = sourceFrames[0];
2175 const uint8_t* source1 = sourceFrames[1];
2176 const uint8_t* source2 = sourceFrames[2];
2177 const uint8_t* source3 = sourceFrames[3];
2178 uint8_t* target = targetFrame;
2179
2180 constexpr unsigned int tBlockSize = 16u;
2181
2182 uint8x16x4_t source_8x16x4;
2183
2184 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2185 {
2186 const unsigned int pixels = width * height;
2187 const unsigned int blocks = pixels / tBlockSize;
2188 const unsigned int remaining = pixels % tBlockSize;
2189
2190 for (unsigned int n = 0u; n < blocks; ++n)
2191 {
2192 source_8x16x4.val[0] = vld1q_u8(source0);
2193 source_8x16x4.val[1] = vld1q_u8(source1);
2194 source_8x16x4.val[2] = vld1q_u8(source2);
2195 source_8x16x4.val[3] = vld1q_u8(source3);
2196
2197 vst4q_u8(target, source_8x16x4);
2198
2199 source0 += tBlockSize;
2200 source1 += tBlockSize;
2201 source2 += tBlockSize;
2202 source3 += tBlockSize;
2203
2204 target += tBlockSize * tChannels;
2205 }
2206
2207 for (unsigned int n = 0u; n < remaining; ++n)
2208 {
2209 target[n * tChannels + 0u] = source0[n];
2210 target[n * tChannels + 1u] = source1[n];
2211 target[n * tChannels + 2u] = source2[n];
2212 target[n * tChannels + 3u] = source3[n];
2213 }
2214 }
2215 else
2216 {
2217 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2218 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2219 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2220 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2221
2222 const unsigned int blocks = width / tBlockSize;
2223 const unsigned int remaining = width % tBlockSize;
2224
2225 for (unsigned int y = 0u; y < height; ++y)
2226 {
2227 for (unsigned int n = 0u; n < blocks; ++n)
2228 {
2229 source_8x16x4.val[0] = vld1q_u8(source0);
2230 source_8x16x4.val[1] = vld1q_u8(source1);
2231 source_8x16x4.val[2] = vld1q_u8(source2);
2232 source_8x16x4.val[3] = vld1q_u8(source3);
2233
2234 vst4q_u8(target, source_8x16x4);
2235
2236 source0 += tBlockSize;
2237 source1 += tBlockSize;
2238 source2 += tBlockSize;
2239 source3 += tBlockSize;
2240
2241 target += tBlockSize * tChannels;
2242 }
2243
2244 for (unsigned int n = 0u; n < remaining; ++n)
2245 {
2246 target[n * tChannels + 0u] = source0[n];
2247 target[n * tChannels + 1u] = source1[n];
2248 target[n * tChannels + 2u] = source2[n];
2249 target[n * tChannels + 3u] = source3[n];
2250 }
2251
2252 source0 += remaining + sourceFrame0PaddingElements;
2253 source1 += remaining + sourceFrame1PaddingElements;
2254 source2 += remaining + sourceFrame2PaddingElements;
2255 source3 += remaining + sourceFrame3PaddingElements;
2256 target += remaining * tChannels + targetFramePaddingElements;
2257 }
2258 }
2259}
2260
2261template <>
2262inline void FrameChannels::zipChannels<float, uint8_t, 2u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2263{
2264 ocean_assert(sourceFrames != nullptr);
2265 ocean_assert(targetFrame != nullptr);
2266
2267 ocean_assert(width != 0u && height != 0u);
2268 ocean_assert(channels == 2u);
2269
2270 constexpr unsigned int tChannels = 2u;
2271
2272 bool allSourceFramesContinuous = true;
2273
2274 if (sourceFramesPaddingElements != nullptr)
2275 {
2276 for (unsigned int n = 0u; n < tChannels; ++n)
2277 {
2278 if (sourceFramesPaddingElements[n] != 0u)
2279 {
2280 allSourceFramesContinuous = false;
2281 break;
2282 }
2283 }
2284 }
2285
2286 const float* source0 = sourceFrames[0];
2287 const float* source1 = sourceFrames[1];
2288 uint8_t* target = targetFrame;
2289
2290 constexpr unsigned int tBlockSize = 16u;
2291
2292 uint8x16x2_t target_8x16x2;
2293
2294 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2295 {
2296 const unsigned int pixels = width * height;
2297 const unsigned int blocks = pixels / tBlockSize;
2298 const unsigned int remaining = pixels % tBlockSize;
2299
2300 for (unsigned int n = 0u; n < blocks; ++n)
2301 {
2302 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2303 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2304
2305 vst2q_u8(target, target_8x16x2);
2306
2307 source0 += tBlockSize;
2308 source1 += tBlockSize;
2309
2310 target += tBlockSize * tChannels;
2311 }
2312
2313 for (unsigned int n = 0u; n < remaining; ++n)
2314 {
2315 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2316 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2317
2318 target[n * tChannels + 0u] = uint8_t(source0[n]);
2319 target[n * tChannels + 1u] = uint8_t(source1[n]);
2320 }
2321 }
2322 else
2323 {
2324 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2325 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2326
2327 const unsigned int blocks = width / tBlockSize;
2328 const unsigned int remaining = width % tBlockSize;
2329
2330 for (unsigned int y = 0u; y < height; ++y)
2331 {
2332 for (unsigned int n = 0u; n < blocks; ++n)
2333 {
2334 target_8x16x2.val[0] = NEON::cast16ElementsNEON(source0);
2335 target_8x16x2.val[1] = NEON::cast16ElementsNEON(source1);
2336
2337 vst2q_u8(target, target_8x16x2);
2338
2339 source0 += tBlockSize;
2340 source1 += tBlockSize;
2341
2342 target += tBlockSize * tChannels;
2343 }
2344
2345 for (unsigned int n = 0u; n < remaining; ++n)
2346 {
2347 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2348 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2349
2350 target[n * tChannels + 0u] = uint8_t(source0[n]);
2351 target[n * tChannels + 1u] = uint8_t(source1[n]);
2352 }
2353
2354 source0 += remaining + sourceFrame0PaddingElements;
2355 source1 += remaining + sourceFrame1PaddingElements;
2356 target += remaining * tChannels + targetFramePaddingElements;
2357 }
2358 }
2359}
2360
2361template <>
2362inline void FrameChannels::zipChannels<float, uint8_t, 3u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2363{
2364 ocean_assert(sourceFrames != nullptr);
2365 ocean_assert(targetFrame != nullptr);
2366
2367 ocean_assert(width != 0u && height != 0u);
2368 ocean_assert(channels == 3u);
2369
2370 constexpr unsigned int tChannels = 3u;
2371
2372 bool allSourceFramesContinuous = true;
2373
2374 if (sourceFramesPaddingElements != nullptr)
2375 {
2376 for (unsigned int n = 0u; n < tChannels; ++n)
2377 {
2378 if (sourceFramesPaddingElements[n] != 0u)
2379 {
2380 allSourceFramesContinuous = false;
2381 break;
2382 }
2383 }
2384 }
2385
2386 const float* source0 = sourceFrames[0];
2387 const float* source1 = sourceFrames[1];
2388 const float* source2 = sourceFrames[2];
2389 uint8_t* target = targetFrame;
2390
2391 constexpr unsigned int tBlockSize = 16u;
2392
2393 uint8x16x3_t target_8x16x3;
2394
2395 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2396 {
2397 const unsigned int pixels = width * height;
2398 const unsigned int blocks = pixels / tBlockSize;
2399 const unsigned int remaining = pixels % tBlockSize;
2400
2401 for (unsigned int n = 0u; n < blocks; ++n)
2402 {
2403 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2404 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2405 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2406
2407 vst3q_u8(target, target_8x16x3);
2408
2409 source0 += tBlockSize;
2410 source1 += tBlockSize;
2411 source2 += tBlockSize;
2412
2413 target += tBlockSize * tChannels;
2414 }
2415
2416 for (unsigned int n = 0u; n < remaining; ++n)
2417 {
2418 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2419 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2420 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2421
2422 target[n * tChannels + 0u] = uint8_t(source0[n]);
2423 target[n * tChannels + 1u] = uint8_t(source1[n]);
2424 target[n * tChannels + 2u] = uint8_t(source2[n]);
2425 }
2426 }
2427 else
2428 {
2429 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2430 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2431 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2432
2433 const unsigned int blocks = width / tBlockSize;
2434 const unsigned int remaining = width % tBlockSize;
2435
2436 for (unsigned int y = 0u; y < height; ++y)
2437 {
2438 for (unsigned int n = 0u; n < blocks; ++n)
2439 {
2440 target_8x16x3.val[0] = NEON::cast16ElementsNEON(source0);
2441 target_8x16x3.val[1] = NEON::cast16ElementsNEON(source1);
2442 target_8x16x3.val[2] = NEON::cast16ElementsNEON(source2);
2443
2444
2445 vst3q_u8(target, target_8x16x3);
2446
2447 source0 += tBlockSize;
2448 source1 += tBlockSize;
2449 source2 += tBlockSize;
2450
2451 target += tBlockSize * tChannels;
2452 }
2453
2454 for (unsigned int n = 0u; n < remaining; ++n)
2455 {
2456 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2457 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2458 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2459
2460 target[n * tChannels + 0u] = uint8_t(source0[n]);
2461 target[n * tChannels + 1u] = uint8_t(source1[n]);
2462 target[n * tChannels + 2u] = uint8_t(source2[n]);
2463 }
2464
2465 source0 += remaining + sourceFrame0PaddingElements;
2466 source1 += remaining + sourceFrame1PaddingElements;
2467 source2 += remaining + sourceFrame2PaddingElements;
2468 target += remaining * tChannels + targetFramePaddingElements;
2469 }
2470 }
2471}
2472
2473template <>
2474inline void FrameChannels::zipChannels<float, uint8_t, 4u>(const float* const* sourceFrames, uint8_t* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2475{
2476 ocean_assert(sourceFrames != nullptr);
2477 ocean_assert(targetFrame != nullptr);
2478
2479 ocean_assert(width != 0u && height != 0u);
2480 ocean_assert(channels == 4u);
2481
2482 constexpr unsigned int tChannels = 4u;
2483
2484 bool allSourceFramesContinuous = true;
2485
2486 if (sourceFramesPaddingElements != nullptr)
2487 {
2488 for (unsigned int n = 0u; n < tChannels; ++n)
2489 {
2490 if (sourceFramesPaddingElements[n] != 0u)
2491 {
2492 allSourceFramesContinuous = false;
2493 break;
2494 }
2495 }
2496 }
2497
2498 const float* source0 = sourceFrames[0];
2499 const float* source1 = sourceFrames[1];
2500 const float* source2 = sourceFrames[2];
2501 const float* source3 = sourceFrames[3];
2502 uint8_t* target = targetFrame;
2503
2504 constexpr unsigned int tBlockSize = 16u;
2505
2506 uint8x16x4_t target_8x16x4;
2507
2508 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2509 {
2510 const unsigned int pixels = width * height;
2511 const unsigned int blocks = pixels / tBlockSize;
2512 const unsigned int remaining = pixels % tBlockSize;
2513
2514 for (unsigned int n = 0u; n < blocks; ++n)
2515 {
2516 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2517 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2518 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2519 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2520
2521 vst4q_u8(target, target_8x16x4);
2522
2523 source0 += tBlockSize;
2524 source1 += tBlockSize;
2525 source2 += tBlockSize;
2526 source3 += tBlockSize;
2527
2528 target += tBlockSize * tChannels;
2529 }
2530
2531 for (unsigned int n = 0u; n < remaining; ++n)
2532 {
2533 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2534 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2535 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2536 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2537
2538 target[n * tChannels + 0u] = uint8_t(source0[n]);
2539 target[n * tChannels + 1u] = uint8_t(source1[n]);
2540 target[n * tChannels + 2u] = uint8_t(source2[n]);
2541 target[n * tChannels + 3u] = uint8_t(source3[n]);
2542 }
2543 }
2544 else
2545 {
2546 const unsigned int sourceFrame0PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[0];
2547 const unsigned int sourceFrame1PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[1];
2548 const unsigned int sourceFrame2PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[2];
2549 const unsigned int sourceFrame3PaddingElements = sourceFramesPaddingElements == nullptr ? 0u : sourceFramesPaddingElements[3];
2550
2551 const unsigned int blocks = width / tBlockSize;
2552 const unsigned int remaining = width % tBlockSize;
2553
2554 for (unsigned int y = 0u; y < height; ++y)
2555 {
2556 for (unsigned int n = 0u; n < blocks; ++n)
2557 {
2558 target_8x16x4.val[0] = NEON::cast16ElementsNEON(source0);
2559 target_8x16x4.val[1] = NEON::cast16ElementsNEON(source1);
2560 target_8x16x4.val[2] = NEON::cast16ElementsNEON(source2);
2561 target_8x16x4.val[3] = NEON::cast16ElementsNEON(source3);
2562
2563 vst4q_u8(target, target_8x16x4);
2564
2565 source0 += tBlockSize;
2566 source1 += tBlockSize;
2567 source2 += tBlockSize;
2568 source3 += tBlockSize;
2569
2570 target += tBlockSize * tChannels;
2571 }
2572
2573 for (unsigned int n = 0u; n < remaining; ++n)
2574 {
2575 ocean_assert(source0[n] >= 0.0f && source0[n] < 256.0f);
2576 ocean_assert(source1[n] >= 0.0f && source1[n] < 256.0f);
2577 ocean_assert(source2[n] >= 0.0f && source2[n] < 256.0f);
2578 ocean_assert(source3[n] >= 0.0f && source3[n] < 256.0f);
2579
2580 target[n * tChannels + 0u] = uint8_t(source0[n]);
2581 target[n * tChannels + 1u] = uint8_t(source1[n]);
2582 target[n * tChannels + 2u] = uint8_t(source2[n]);
2583 target[n * tChannels + 3u] = uint8_t(source3[n]);
2584 }
2585
2586 source0 += remaining + sourceFrame0PaddingElements;
2587 source1 += remaining + sourceFrame1PaddingElements;
2588 source2 += remaining + sourceFrame2PaddingElements;
2589 source3 += remaining + sourceFrame3PaddingElements;
2590 target += remaining * tChannels + targetFramePaddingElements;
2591 }
2592 }
2593}
2594
2595#endif // defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2596
2597template <typename TSource, typename TTarget, unsigned int tChannels>
2598void FrameChannels::zipChannels(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
2599{
2600 ocean_assert(sourceFrames != nullptr);
2601 ocean_assert(targetFrame != nullptr);
2602
2603 ocean_assert(width != 0u && height != 0u);
2604
2605 ocean_assert(tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME || tChannels == channels);
2606
2607 if constexpr (tChannels == CHANNELS_NOT_KNOWN_AT_COMPILE_TIME)
2608 {
2609 zipChannelsRuntime<TSource, TTarget>(sourceFrames, targetFrame, width, height, channels, sourceFramesPaddingElements, targetFramePaddingElements);
2610 return;
2611 }
2612
2613 bool allSourceFramesContinuous = true;
2614
2615 if (sourceFramesPaddingElements != nullptr)
2616 {
2617 for (unsigned int n = 0u; n < tChannels; ++n)
2618 {
2619 if (sourceFramesPaddingElements[n] != 0u)
2620 {
2621 allSourceFramesContinuous = false;
2622 break;
2623 }
2624 }
2625 }
2626
2627 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
2628 {
2629 for (unsigned int n = 0u; n < width * height; ++n)
2630 {
2631 for (unsigned int c = 0u; c < tChannels; ++c)
2632 {
2633 targetFrame[n * tChannels + c] = TTarget(sourceFrames[c][n]);
2634 }
2635 }
2636 }
2637 else
2638 {
2639 const unsigned int targetFrameStrideElements = width * tChannels + targetFramePaddingElements;
2640
2641 Indices32 sourceFrameStrideElements(tChannels);
2642
2643 for (unsigned int c = 0u; c < tChannels; ++c)
2644 {
2645 if (sourceFramesPaddingElements == nullptr)
2646 {
2647 sourceFrameStrideElements[c] = width;
2648 }
2649 else
2650 {
2651 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
2652 }
2653 }
2654
2655 for (unsigned int y = 0u; y < height; ++y)
2656 {
2657 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
2658
2659 for (unsigned int x = 0u; x < width; ++x)
2660 {
2661 for (unsigned int c = 0u; c < tChannels; ++c)
2662 {
2663 *(targetRow + x * tChannels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
2664 }
2665 }
2666 }
2667 }
2668}
2669
2670template <typename TSource, typename TTarget>
2671void FrameChannels::zipChannels(const std::initializer_list<const TSource*>& sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const std::initializer_list<unsigned int>& sourceFramePaddingElements, const unsigned int targetFramePaddingElements)
2672{
2673 ocean_assert(sourceFrames.size() >= 1);
2674 ocean_assert(sourceFramePaddingElements.size() == 0 || sourceFrames.size() == sourceFramePaddingElements.size());
2675
2676 if (sourceFrames.size() == 2)
2677 {
2678 zipChannels<TSource, TTarget, 2u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2679 }
2680 else if (sourceFrames.size() == 3)
2681 {
2682 zipChannels<TSource, TTarget, 3u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2683 }
2684 else if (sourceFrames.size() == 4)
2685 {
2686 zipChannels<TSource, TTarget, 4u>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2687 }
2688 else
2689 {
2690 zipChannels<TSource, TTarget, CHANNELS_NOT_KNOWN_AT_COMPILE_TIME>(sourceFrames.begin(), targetFrame, width, height, (unsigned int)(sourceFrames.size()), sourceFramePaddingElements.size() == 0 ? nullptr : sourceFramePaddingElements.begin(), targetFramePaddingElements);
2691 }
2692}
2693
2694template <typename T, unsigned int tSourceChannels>
2695inline void FrameChannels::addFirstChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2696{
2697 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2698
2699 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2700 ocean_assert(source != target);
2701 ocean_assert(width >= 1u && height >= 1u);
2702
2703 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2704
2705 const void* sources[2] = {source, sourceNewChannel};
2706
2707 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, true>, options, worker);
2708}
2709
2710template <typename T, unsigned int tSourceChannels>
2711inline void FrameChannels::addFirstChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2712{
2713 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2714
2715 ocean_assert(source != nullptr && target != nullptr);
2716 ocean_assert(width >= 1u && height >= 1u);
2717
2718 const unsigned int targetChannels = tSourceChannels + 1u;
2719
2720 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2721 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2722
2723 const void* channelValueParameter = (const void*)(&newChannelValue);
2724
2725 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2726
2727 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, true>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2728}
2729
2730template <typename T, unsigned int tSourceChannels>
2731inline void FrameChannels::addLastChannel(const T* source, const T* sourceNewChannel, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2732{
2733 static_assert(tSourceChannels != 0u, "Invalid channel number!");
2734
2735 ocean_assert(source != nullptr && sourceNewChannel != nullptr && target != nullptr);
2736 ocean_assert(source != target);
2737 ocean_assert(width >= 1u && height >= 1u);
2738
2739 const unsigned int options[3] = {sourcePaddingElements, sourceNewChannelPaddingElements, targetPaddingElements};
2740
2741 const void* sources[2] = {source, sourceNewChannel};
2742
2743 FrameConverter::convertArbitraryPixelFormat(sources, (void**)&target, width, height, conversionFlag, 1u, FrameChannels::addChannelRow<T, tSourceChannels, false>, options, worker);
2744}
2745
2746template <typename T, unsigned int tSourceChannels>
2747inline void FrameChannels::addLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2748{
2749 static_assert(tSourceChannels >= 1u, "Invalid channel number!");
2750
2751 ocean_assert(source != nullptr && target != nullptr);
2752 ocean_assert(width >= 1u && height >= 1u);
2753
2754 const unsigned int targetChannels = tSourceChannels + 1u;
2755
2756 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2757 const unsigned int targetStrideElements = width * targetChannels + targetPaddingElements;
2758
2759 const void* channelValueParameter = (const void*)(&newChannelValue);
2760
2761 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2762
2763 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::addChannelValueRow<T, tSourceChannels, false>, FrameChannels::reverseRowPixelOrderInPlace<T, targetChannels>, areContinuous, channelValueParameter, worker);
2764}
2765
2766template <typename T, unsigned int tSourceChannels>
2767inline void FrameChannels::removeFirstChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2768{
2769 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2770
2771 ocean_assert(source != nullptr && target != nullptr);
2772 ocean_assert(width >= 1u && height >= 1u);
2773
2774 const unsigned int shufflePatternMax = 0x07654321u;
2775 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2776
2777 const unsigned int shufflePattern = shufflePatternMax & mask;
2778
2779 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2780}
2781
2782template <typename T, unsigned int tSourceChannels>
2783inline void FrameChannels::removeLastChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2784{
2785 static_assert(tSourceChannels >= 2u && tSourceChannels <= 8u, "Invalid channel number!");
2786
2787 ocean_assert(source != nullptr && target != nullptr);
2788 ocean_assert(width >= 1u && height >= 1u);
2789
2790 const unsigned int shufflePatternMax = 0x76543210u;
2791 const unsigned int mask = 0xFFFFFFFFu >> ((8u - tSourceChannels + 1u) * 4u); // e.g., 0xFF for tChannels == 3u, 0xFFF for tChannels == 4u
2792
2793 const unsigned int shufflePattern = shufflePatternMax & mask;
2794
2795 FrameChannels::shuffleChannels<T, tSourceChannels, tSourceChannels - 1u, shufflePattern>(source, target, width, height, conversionFlag, sourcePaddingElements, targetPaddingElements, worker);
2796}
2797
2798template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
2799inline void FrameChannels::copyChannel(const T* source, T* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2800{
2801 static_assert(tSourceChannels >= 1u, "Invalid number of channels!");
2802 static_assert(tTargetChannels >= 1u, "Invalid number of channels!");
2803
2804 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel index!");
2805 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel index!");
2806
2807 ocean_assert(source != nullptr && target != nullptr);
2808 ocean_assert(width >= 1u && height >= 1u);
2809
2810 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
2811 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
2812
2813 constexpr RowReversePixelOrderInPlaceFunction<T> reversePixelOrderRowInPlaceFunction = nullptr;
2814
2815 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
2816
2817 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, CONVERT_NORMAL, FrameChannels::copyChannelRow<T, tSourceChannels, tTargetChannels, tSourceChannelIndex, tTargetChannelIndex>, reversePixelOrderRowInPlaceFunction, areContinuous, nullptr, worker);
2818}
2819
2820template <typename T, unsigned int tChannel, unsigned int tChannels>
2821inline void FrameChannels::setChannel(T* frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker* worker)
2822{
2823 static_assert(tChannels >= 1u, "Invalid channel number!");
2824 static_assert(tChannel < tChannels, "Invalid channel index!");
2825
2826 ocean_assert(frame != nullptr);
2827 ocean_assert(width >= 1u && height >= 1u);
2828
2829 if (worker)
2830 {
2831 worker->executeFunction(Worker::Function::createStatic(&setChannelSubset<T, tChannel, tChannels>, frame, width, value, framePaddingElements, 0u, 0u), 0u, height);
2832 }
2833 else
2834 {
2835 setChannelSubset<T, tChannel, tChannels>(frame, width, value, framePaddingElements, 0u, height);
2836 }
2837}
2838
2839template <typename T, unsigned int tChannels>
2840inline void FrameChannels::reverseChannelOrder(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
2841{
2842 static_assert(tChannels >= 1u, "Invalid channel number!");
2843
2844 ocean_assert(source != nullptr && target != nullptr);
2845 ocean_assert(width >= 1u && height >= 1u);
2846
2847 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
2848 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
2849
2850 constexpr bool areContinuous = false; // even if both images are continuous, we must reverse each line by another
2851
2852 FrameConverter::convertGenericPixelFormat<T>(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::reverseRowChannelOrder<T, tChannels>, FrameChannels::reverseRowPixelOrderInPlace<T, tChannels>, areContinuous, nullptr, worker);
2853}
2854
2855template <typename T, unsigned int tChannels>
2856void FrameChannels::reverseRowPixelOrder(const T* source, T* target, const size_t size)
2857{
2858 static_assert(tChannels >= 1u, "Invalid channel number!");
2859
2860 ocean_assert(source != nullptr && target != nullptr);
2861 ocean_assert(size >= 1);
2862
2863#ifdef OCEAN_DEBUG
2864 const T* const debugSourceStart = source;
2865 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
2866
2867 const T* const debugTargetStart = target;
2868 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
2869#endif
2870
2871 // moving target to the end of the memory block
2872 target += size * tChannels;
2873
2874 const T* const sourceEnd = source + size * tChannels;
2875
2876#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
2877
2878 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
2879 {
2880 const size_t blocks16 = size / size_t(16);
2881
2882 switch (tChannels)
2883 {
2884 case 1u:
2885 {
2886 for (size_t n = 0; n < blocks16; ++n)
2887 {
2888 target -= 16u * tChannels;
2889
2890 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2891 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2892
2893 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)(source));
2894 uint8x16_t revSource_u_8x16 = vrev64q_u8(source_u_8x16);
2895 revSource_u_8x16 = vcombine_u8(vget_high_u8(revSource_u_8x16), vget_low_u8(revSource_u_8x16));
2896
2897 vst1q_u8((uint8_t*)(target), revSource_u_8x16);
2898
2899 source += 16u * tChannels;
2900 }
2901
2902 break;
2903 }
2904
2905 case 2u:
2906 {
2907 for (size_t n = 0; n < blocks16; ++n)
2908 {
2909 target -= 16u * tChannels;
2910
2911 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2912 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2913
2914 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2915 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2916
2917 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceA_u_8x16)));
2918 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u16(vrev64q_u16(vreinterpretq_u16_u8(sourceB_u_8x16)));
2919
2920 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2921 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2922
2923 vst1q_u8((uint8_t*)(target) + 0, targetB_u_8x16);
2924 vst1q_u8((uint8_t*)(target) + 16, targetA_u_8x16);
2925
2926 source += 16u * tChannels;
2927 }
2928
2929 break;
2930 }
2931
2932 case 3u:
2933 {
2934 for (size_t n = 0; n < blocks16; ++n)
2935 {
2936 target -= 16u * tChannels;
2937
2938 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2939 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2940
2941 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)(source));
2942
2943 uint8x16x3_t revSource_u_8x16x3;
2944 revSource_u_8x16x3.val[0] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[0])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[0])));
2945 revSource_u_8x16x3.val[1] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[1])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[1])));
2946 revSource_u_8x16x3.val[2] = vcombine_u8(vrev64_u8(vget_high_u8(source_u_8x16x3.val[2])), vrev64_u8(vget_low_u8(source_u_8x16x3.val[2])));
2947
2948 vst3q_u8((uint8_t*)(target), revSource_u_8x16x3);
2949
2950 source += 16u * tChannels;
2951 }
2952
2953 break;
2954 }
2955
2956 case 4u:
2957 {
2958 for (size_t n = 0; n < blocks16; ++n)
2959 {
2960 target -= 16u * tChannels;
2961
2962 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
2963 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
2964
2965 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)(source) + 0);
2966 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)(source) + 16);
2967 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)(source) + 32);
2968 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)(source) + 48);
2969
2970 const uint8x16_t revSourceA_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceA_u_8x16)));
2971 const uint8x16_t revSourceB_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceB_u_8x16)));
2972 const uint8x16_t revSourceC_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceC_u_8x16)));
2973 const uint8x16_t revSourceD_u_8x16 = vreinterpretq_u8_u32(vrev64q_u32(vreinterpretq_u32_u8(sourceD_u_8x16)));
2974
2975 const uint8x16_t targetA_u_8x16 = vcombine_u8(vget_high_u8(revSourceA_u_8x16), vget_low_u8(revSourceA_u_8x16));
2976 const uint8x16_t targetB_u_8x16 = vcombine_u8(vget_high_u8(revSourceB_u_8x16), vget_low_u8(revSourceB_u_8x16));
2977 const uint8x16_t targetC_u_8x16 = vcombine_u8(vget_high_u8(revSourceC_u_8x16), vget_low_u8(revSourceC_u_8x16));
2978 const uint8x16_t targetD_u_8x16 = vcombine_u8(vget_high_u8(revSourceD_u_8x16), vget_low_u8(revSourceD_u_8x16));
2979
2980 vst1q_u8((uint8_t*)(target) + 0, targetD_u_8x16);
2981 vst1q_u8((uint8_t*)(target) + 16, targetC_u_8x16);
2982 vst1q_u8((uint8_t*)(target) + 32, targetB_u_8x16);
2983 vst1q_u8((uint8_t*)(target) + 48, targetA_u_8x16);
2984
2985 source += 16u * tChannels;
2986 }
2987
2988 break;
2989 }
2990
2991 default:
2992 break;
2993 }
2994 }
2995
2996#endif // OCEAN_HARDWARE_NEON_VERSION
2997
2998 while (source != sourceEnd)
2999 {
3000 ocean_assert(source < sourceEnd);
3001
3002 for (unsigned int n = 0u; n < tChannels; ++n)
3003 {
3004 ocean_assert(source + tChannels - n - 1u >= debugSourceStart);
3005 ocean_assert(source + tChannels - n - 1u < debugSourceEnd);
3006
3007 ocean_assert(target > debugTargetStart && target <= debugTargetEnd);
3008
3009 *--target = source[tChannels - n - 1u];
3010 }
3011
3012 source += tChannels;
3013 }
3014}
3015
3016template <typename T, unsigned int tChannels>
3017void FrameChannels::reverseRowPixelOrderInPlace(T* data, const size_t size)
3018{
3019 static_assert(tChannels >= 1u, "Invalid channel number!");
3020
3021 ocean_assert(data != nullptr);
3022 ocean_assert(size >= 1);
3023
3024 typedef typename DataType<T, tChannels>::Type PixelType;
3025
3026 size_t n = 0;
3027
3028#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3029
3030 if constexpr (std::is_same<typename TypeMapper<T>::Type, uint8_t>::value)
3031 {
3032 if (size >= 32)
3033 {
3034 const size_t blocks32 = size / size_t(32);
3035
3036 uint8_t* left = (uint8_t*)(data);
3037 uint8_t* right = (uint8_t*)(data) + (size - 16u) * tChannels;
3038
3039 switch (tChannels)
3040 {
3041 case 1u:
3042 {
3043 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3044 {
3045 const uint8x16_t left_u_8x16 = vld1q_u8(left);
3046 const uint8x16_t right_u_8x16 = vld1q_u8(right);
3047
3048 uint8x16_t revLeft_u_8x16 = vrev64q_u8(left_u_8x16);
3049 revLeft_u_8x16 = vcombine_u8(vget_high_u8(revLeft_u_8x16), vget_low_u8(revLeft_u_8x16));
3050
3051 uint8x16_t revRight_u_8x16 = vrev64q_u8(right_u_8x16);
3052 revRight_u_8x16 = vcombine_u8(vget_high_u8(revRight_u_8x16), vget_low_u8(revRight_u_8x16));
3053
3054 vst1q_u8(left, revRight_u_8x16);
3055 vst1q_u8(right, revLeft_u_8x16);
3056
3057 left += 16u * tChannels;
3058 right -= 16u * tChannels;
3059 }
3060
3061 n += blocks32 * 16u;
3062
3063 break;
3064 }
3065
3066 case 2u:
3067 {
3068 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3069 {
3070 const uint8x16x2_t left_u_8x16x2 = vld2q_u8(left);
3071 const uint8x16x2_t right_u_8x16x2 = vld2q_u8(right);
3072
3073 uint8x16x2_t revLeft_u_8x16x2;
3074 revLeft_u_8x16x2.val[0] = vrev64q_u8(left_u_8x16x2.val[0]);
3075 revLeft_u_8x16x2.val[1] = vrev64q_u8(left_u_8x16x2.val[1]);
3076 revLeft_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[0]), vget_low_u8(revLeft_u_8x16x2.val[0]));
3077 revLeft_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x2.val[1]), vget_low_u8(revLeft_u_8x16x2.val[1]));
3078
3079 uint8x16x2_t revRight_u_8x16x2;
3080 revRight_u_8x16x2.val[0] = vrev64q_u8(right_u_8x16x2.val[0]);
3081 revRight_u_8x16x2.val[1] = vrev64q_u8(right_u_8x16x2.val[1]);
3082 revRight_u_8x16x2.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[0]), vget_low_u8(revRight_u_8x16x2.val[0]));
3083 revRight_u_8x16x2.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x2.val[1]), vget_low_u8(revRight_u_8x16x2.val[1]));
3084
3085 vst2q_u8(left, revRight_u_8x16x2);
3086 vst2q_u8(right, revLeft_u_8x16x2);
3087
3088 left += 16u * tChannels;
3089 right -= 16u * tChannels;
3090 }
3091
3092 n += blocks32 * 16u;
3093
3094 break;
3095 }
3096
3097 case 3u:
3098 {
3099 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3100 {
3101 const uint8x16x3_t left_u_8x16x3 = vld3q_u8(left);
3102 const uint8x16x3_t right_u_8x16x3 = vld3q_u8(right);
3103
3104 uint8x16x3_t revLeft_u_8x16x3;
3105 revLeft_u_8x16x3.val[0] = vrev64q_u8(left_u_8x16x3.val[0]);
3106 revLeft_u_8x16x3.val[1] = vrev64q_u8(left_u_8x16x3.val[1]);
3107 revLeft_u_8x16x3.val[2] = vrev64q_u8(left_u_8x16x3.val[2]);
3108 revLeft_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[0]), vget_low_u8(revLeft_u_8x16x3.val[0]));
3109 revLeft_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[1]), vget_low_u8(revLeft_u_8x16x3.val[1]));
3110 revLeft_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x3.val[2]), vget_low_u8(revLeft_u_8x16x3.val[2]));
3111
3112 uint8x16x3_t revRight_u_8x16x3;
3113 revRight_u_8x16x3.val[0] = vrev64q_u8(right_u_8x16x3.val[0]);
3114 revRight_u_8x16x3.val[1] = vrev64q_u8(right_u_8x16x3.val[1]);
3115 revRight_u_8x16x3.val[2] = vrev64q_u8(right_u_8x16x3.val[2]);
3116 revRight_u_8x16x3.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[0]), vget_low_u8(revRight_u_8x16x3.val[0]));
3117 revRight_u_8x16x3.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[1]), vget_low_u8(revRight_u_8x16x3.val[1]));
3118 revRight_u_8x16x3.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x3.val[2]), vget_low_u8(revRight_u_8x16x3.val[2]));
3119
3120 vst3q_u8(left, revRight_u_8x16x3);
3121 vst3q_u8(right, revLeft_u_8x16x3);
3122
3123 left += 16u * tChannels;
3124 right -= 16u * tChannels;
3125 }
3126
3127 n += blocks32 * 16u;
3128
3129 break;
3130 }
3131
3132 case 4u:
3133 {
3134 for (size_t nBlock = 0; nBlock < blocks32; ++nBlock)
3135 {
3136 const uint8x16x4_t left_u_8x16x4 = vld4q_u8(left);
3137 const uint8x16x4_t right_u_8x16x4 = vld4q_u8(right);
3138
3139 uint8x16x4_t revLeft_u_8x16x4;
3140 revLeft_u_8x16x4.val[0] = vrev64q_u8(left_u_8x16x4.val[0]);
3141 revLeft_u_8x16x4.val[1] = vrev64q_u8(left_u_8x16x4.val[1]);
3142 revLeft_u_8x16x4.val[2] = vrev64q_u8(left_u_8x16x4.val[2]);
3143 revLeft_u_8x16x4.val[3] = vrev64q_u8(left_u_8x16x4.val[3]);
3144 revLeft_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[0]), vget_low_u8(revLeft_u_8x16x4.val[0]));
3145 revLeft_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[1]), vget_low_u8(revLeft_u_8x16x4.val[1]));
3146 revLeft_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[2]), vget_low_u8(revLeft_u_8x16x4.val[2]));
3147 revLeft_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revLeft_u_8x16x4.val[3]), vget_low_u8(revLeft_u_8x16x4.val[3]));
3148
3149 uint8x16x4_t revRight_u_8x16x4;
3150 revRight_u_8x16x4.val[0] = vrev64q_u8(right_u_8x16x4.val[0]);
3151 revRight_u_8x16x4.val[1] = vrev64q_u8(right_u_8x16x4.val[1]);
3152 revRight_u_8x16x4.val[2] = vrev64q_u8(right_u_8x16x4.val[2]);
3153 revRight_u_8x16x4.val[3] = vrev64q_u8(right_u_8x16x4.val[3]);
3154 revRight_u_8x16x4.val[0] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[0]), vget_low_u8(revRight_u_8x16x4.val[0]));
3155 revRight_u_8x16x4.val[1] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[1]), vget_low_u8(revRight_u_8x16x4.val[1]));
3156 revRight_u_8x16x4.val[2] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[2]), vget_low_u8(revRight_u_8x16x4.val[2]));
3157 revRight_u_8x16x4.val[3] = vcombine_u8(vget_high_u8(revRight_u_8x16x4.val[3]), vget_low_u8(revRight_u_8x16x4.val[3]));
3158
3159 vst4q_u8(left, revRight_u_8x16x4);
3160 vst4q_u8(right, revLeft_u_8x16x4);
3161
3162 left += 16u * tChannels;
3163 right -= 16u * tChannels;
3164 }
3165
3166 n += blocks32 * 16u;
3167
3168 break;
3169 }
3170
3171 default:
3172 break;
3173 }
3174 }
3175 }
3176
3177#endif
3178
3179 PixelType intermediate;
3180
3181 PixelType* const pixels = (PixelType*)(data);
3182
3183 while (n < size / 2)
3184 {
3185 intermediate = pixels[n];
3186
3187 pixels[n] = pixels[size - n - 1];
3188 pixels[size - n - 1] = intermediate;
3189
3190 ++n;
3191 }
3192}
3193
3194template <typename T, unsigned int tChannels>
3195void FrameChannels::reverseRowChannelOrder(const T* source, T* target, const size_t size, const void* /*options*/)
3196{
3197 ocean_assert(source != nullptr && target != nullptr);
3198 ocean_assert(source != target);
3199 ocean_assert(size >= 1);
3200
3201#ifdef OCEAN_DEBUG
3202 const T* const debugSourceStart = source;
3203 const T* const debugSourceEnd = debugSourceStart + size * tChannels;
3204
3205 const T* const debugTargetStart = target;
3206 const T* const debugTargetEnd = debugTargetStart + size * tChannels;
3207#endif
3208
3209 if constexpr (tChannels == 1)
3210 {
3211 // we actually copy the one channel
3212
3213 memcpy(target, source, sizeof(T) * size);
3214 return;
3215 }
3216
3217 const T* const sourceEnd = source + size * tChannels;
3218
3219#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3220
3221 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3222 {
3223 const size_t blocks16 = size / size_t(16);
3224
3225 switch (tChannels)
3226 {
3227 case 1u:
3228 ocean_assert(false && "This should have been handled above!");
3229 break;
3230
3231 case 2u:
3232 {
3233 for (size_t n = 0; n < blocks16; ++n)
3234 {
3235 SSE::reverseChannelOrder2Channel8Bit32Elements((const uint8_t*)source, (uint8_t*)target);
3236
3237 source += 16u * tChannels;
3238 target += 16u * tChannels;
3239 }
3240
3241 break;
3242 }
3243
3244 case 3u:
3245 {
3246 for (size_t n = 0; n < blocks16; ++n)
3247 {
3248 SSE::reverseChannelOrder3Channel8Bit48Elements((const uint8_t*)source, (uint8_t*)target);
3249
3250 source += 16u * tChannels;
3251 target += 16u * tChannels;
3252 }
3253
3254 break;
3255 }
3256
3257 case 4u:
3258 {
3259 for (size_t n = 0; n < blocks16; ++n)
3260 {
3261 SSE::reverseChannelOrder4Channel8Bit64Elements((const uint8_t*)source, (uint8_t*)target);
3262
3263 source += 16u * tChannels;
3264 target += 16u * tChannels;
3265 }
3266
3267 break;
3268 }
3269
3270 default:
3271 break;
3272 }
3273 }
3274
3275#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3276
3277 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3278 {
3279 const size_t blocks16 = size / size_t(16);
3280
3281 switch (tChannels)
3282 {
3283 case 1u:
3284 ocean_assert(false && "This should have been handled above!");
3285 break;
3286
3287 case 2u:
3288 {
3289 for (size_t n = 0; n < blocks16; ++n)
3290 {
3291 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3292 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3293
3294 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3295 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3296
3297 const uint8x16_t revSourceA_u_8x16 = vrev16q_u8(sourceA_u_8x16);
3298 const uint8x16_t revSourceB_u_8x16 = vrev16q_u8(sourceB_u_8x16);
3299
3300 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3301 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3302
3303 source += 16u * tChannels;
3304 target += 16u * tChannels;
3305 }
3306
3307 break;
3308 }
3309
3310 case 3u:
3311 {
3312 for (size_t n = 0; n < blocks16; ++n)
3313 {
3314 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3315 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3316
3317 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3318
3319 uint8x16x3_t revSource_u_8x16x3;
3320 revSource_u_8x16x3.val[0] = source_u_8x16x3.val[2];
3321 revSource_u_8x16x3.val[1] = source_u_8x16x3.val[1];
3322 revSource_u_8x16x3.val[2] = source_u_8x16x3.val[0];
3323
3324 vst3q_u8((uint8_t*)target, revSource_u_8x16x3);
3325
3326 source += 16u * tChannels;
3327 target += 16u * tChannels;
3328 }
3329
3330 break;
3331 }
3332
3333 case 4u:
3334 {
3335 for (size_t n = 0; n < blocks16; ++n)
3336 {
3337 ocean_assert(source >= debugSourceStart && source + 16u * tChannels <= debugSourceEnd);
3338 ocean_assert(target >= debugTargetStart && target + 16u * tChannels <= debugTargetEnd);
3339
3340 const uint8x16_t sourceA_u_8x16 = vld1q_u8((const uint8_t*)source + 0);
3341 const uint8x16_t sourceB_u_8x16 = vld1q_u8((const uint8_t*)source + 16);
3342 const uint8x16_t sourceC_u_8x16 = vld1q_u8((const uint8_t*)source + 32);
3343 const uint8x16_t sourceD_u_8x16 = vld1q_u8((const uint8_t*)source + 48);
3344
3345 const uint8x16_t revSourceA_u_8x16 = vrev32q_u8(sourceA_u_8x16);
3346 const uint8x16_t revSourceB_u_8x16 = vrev32q_u8(sourceB_u_8x16);
3347 const uint8x16_t revSourceC_u_8x16 = vrev32q_u8(sourceC_u_8x16);
3348 const uint8x16_t revSourceD_u_8x16 = vrev32q_u8(sourceD_u_8x16);
3349
3350 vst1q_u8((uint8_t*)target + 0, revSourceA_u_8x16);
3351 vst1q_u8((uint8_t*)target + 16, revSourceB_u_8x16);
3352 vst1q_u8((uint8_t*)target + 32, revSourceC_u_8x16);
3353 vst1q_u8((uint8_t*)target + 48, revSourceD_u_8x16);
3354
3355 source += 16u * tChannels;
3356 target += 16u * tChannels;
3357 }
3358
3359 break;
3360 }
3361
3362 default:
3363 break;
3364 }
3365 }
3366
3367#endif // OCEAN_HARDWARE_NEON_VERSION
3368
3369 while (source != sourceEnd)
3370 {
3371 ocean_assert(source < sourceEnd);
3372
3373 ocean_assert(source >= debugSourceStart && source + tChannels <= debugSourceEnd);
3374 ocean_assert(target >= debugTargetStart && target + tChannels <= debugTargetEnd);
3375
3376 for (unsigned int n = 0u; n < tChannels; ++n)
3377 {
3378 target[n] = source[tChannels - n - 1u];
3379 }
3380
3381 source += tChannels;
3382 target += tChannels;
3383 }
3384}
3385
3386template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3387inline void FrameChannels::shuffleRowChannels(const T* source, T* target, const size_t size, const void* /*unusedOptions*/)
3388{
3389 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3390 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3391
3392 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3393
3394 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3395 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3396 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3397 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3398 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3399 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3400 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3401 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3402
3403 ocean_assert(source != nullptr && target != nullptr);
3404 ocean_assert(size != 0);
3405
3406 const T* const sourceEnd = source + size * tSourceChannels;
3407
3408#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
3409
3410 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3411 {
3412 const size_t blocks16 = size / size_t(16);
3413
3414 switch (tSourceChannels | ((tTargetChannels) << 4u))
3415 {
3416 // 4 -> 4
3417 case (4u | (4u << 4u)):
3418 {
3419 // the following shuffle patterns are known during compile time
3420
3421 constexpr unsigned int offset1 = 0x04040404u;
3422 constexpr unsigned int offset2 = 0x08080808u;
3423 constexpr unsigned int offset3 = 0x0C0C0C0Cu;
3424
3425 // converting shufflePattern16 to shufflePattern16
3426 const unsigned int shufflePattern0 = ((tShufflePattern & 0xF000u) << 12u) | ((tShufflePattern & 0x0F00u) << 8u) | ((tShufflePattern & 0x00F0u) << 4u) | ((tShufflePattern & 0x000Fu) << 0u);
3427
3428 const unsigned int shufflePattern1 = shufflePattern0 + offset1;
3429 const unsigned int shufflePattern2 = shufflePattern0 + offset2;
3430 const unsigned int shufflePattern3 = shufflePattern0 + offset3;
3431
3432 const __m128i shufflePattern128 = SSE::set128i((((unsigned long long)shufflePattern3) << 32ull) | (unsigned long long)shufflePattern2, (((unsigned long long)shufflePattern1) << 32ull) | (unsigned long long)shufflePattern0);
3433
3434 for (size_t n = 0; n < blocks16; ++n)
3435 {
3436 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 0), shufflePattern128), (uint8_t*)target + 0);
3437 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 16), shufflePattern128), (uint8_t*)target + 16);
3438 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 32), shufflePattern128), (uint8_t*)target + 32);
3439 SSE::store128i(_mm_shuffle_epi8(SSE::load128i((const uint8_t*)source + 48), shufflePattern128), (uint8_t*)target + 48);
3440
3441 source += 16u * tSourceChannels;
3442 target += 16u * tTargetChannels;
3443 }
3444
3445 break;
3446 }
3447
3448 default:
3449 // we do not have a NEON-based optimization
3450 break;
3451 }
3452 }
3453
3454#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3455
3456 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3457 {
3458 const size_t blocks16 = size / size_t(16);
3459
3460 switch (tSourceChannels | ((tTargetChannels) << 4u))
3461 {
3462 // 1 -> 3
3463 case (1u | (3u << 4u)):
3464 {
3465 static_assert(tSourceChannels != 1u || tShufflePattern == 0u, "Invalid shuffle patter!");
3466
3467 for (size_t n = 0; n < blocks16; ++n)
3468 {
3469 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3470
3471 uint8x16x3_t target_u_8x16x3;
3472
3473 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3474 {
3475 target_u_8x16x3.val[nT] = source_u_8x16;
3476 }
3477
3478 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3479
3480 source += 16u * tSourceChannels;
3481 target += 16u * tTargetChannels;
3482 }
3483
3484 break;
3485 }
3486
3487 // 2 -> 1
3488 case (2u | (1u << 4u)):
3489 {
3490 for (size_t n = 0; n < blocks16; ++n)
3491 {
3492 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3493
3494 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000001u; // possible index values {0, 1}
3495 static_assert(sourceChannel <= 1u, "Invalid shuffle pattern!");
3496 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3497
3498 const uint8x16_t target_u_8x16 = source_u_8x16x2.val[sourceChannel];
3499
3500 vst1q_u8((uint8_t*)target, target_u_8x16);
3501
3502 source += 16u * tSourceChannels;
3503 target += 16u * tTargetChannels;
3504 }
3505
3506 break;
3507 }
3508
3509 // 2 -> 3
3510 case (2u | (3u << 4u)):
3511 {
3512 for (size_t n = 0; n < blocks16; ++n)
3513 {
3514 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3515
3516 uint8x16x3_t target_u_8x16x3;
3517
3518 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3519 {
3520 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3521
3522 target_u_8x16x3.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3523 }
3524
3525 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3526
3527 source += 16u * tSourceChannels;
3528 target += 16u * tTargetChannels;
3529 }
3530
3531 break;
3532 }
3533
3534 // 2 -> 4
3535 case (2u | (4u << 4u)):
3536 {
3537 for (size_t n = 0; n < blocks16; ++n)
3538 {
3539 const uint8x16x2_t source_u_8x16x2 = vld2q_u8((const uint8_t*)source);
3540
3541 uint8x16x4_t target_u_8x16x4;
3542
3543 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3544 {
3545 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000001u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3546
3547 target_u_8x16x4.val[nT] = source_u_8x16x2.val[(tShufflePattern >> (nT * 4u)) & 0x00000001u]; // possible index values {0, 1}
3548 }
3549
3550 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3551
3552 source += 16u * tSourceChannels;
3553 target += 16u * tTargetChannels;
3554 }
3555
3556 break;
3557 }
3558
3559 // 3 -> 1
3560 case (3u | (1u << 4u)):
3561 {
3562 constexpr unsigned int sourceChannel = (tShufflePattern & 0x0000000Fu) <= 2u ? (tShufflePattern & 0x0000000Fu) : 2u; // possible index values {0, 1, 2}
3563 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3564
3565 for (size_t n = 0; n < blocks16; ++n)
3566 {
3567 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3568
3569 const uint8x16_t target_u_8x16 = source_u_8x16x3.val[sourceChannel];
3570
3571 vst1q_u8((uint8_t*)target, target_u_8x16);
3572
3573 source += 16u * tSourceChannels;
3574 target += 16u * tTargetChannels;
3575 }
3576
3577 break;
3578 }
3579
3580 // 3 -> 2
3581 case (3u | (2u << 4u)):
3582 {
3583 for (size_t n = 0; n < blocks16; ++n)
3584 {
3585 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3586
3587 uint8x16x2_t target_u_8x16x2;
3588
3589 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3590 {
3591 target_u_8x16x2.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3592 }
3593
3594 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3595
3596 source += 16u * tSourceChannels;
3597 target += 16u * tTargetChannels;
3598 }
3599
3600 break;
3601 }
3602
3603 // 3 -> 3
3604 case (3u | (3u << 4u)):
3605 {
3606 for (size_t n = 0; n < blocks16; ++n)
3607 {
3608 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3609
3610 uint8x16x3_t target_u_8x16x3;
3611
3612 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3613 {
3614 target_u_8x16x3.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3615 }
3616
3617 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3618
3619 source += 16u * tSourceChannels;
3620 target += 16u * tTargetChannels;
3621 }
3622
3623 break;
3624 }
3625
3626 // 4 -> 1
3627 case (4u | (1u << 4u)):
3628 {
3629 for (size_t n = 0; n < blocks16; ++n)
3630 {
3631 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3632
3633 constexpr unsigned int sourceChannel = tShufflePattern & 0x00000003u; // possible index values {0, 1, 2, 3}
3634 static_assert(sourceChannel <= 3u, "Invalid shuffle pattern!");
3635
3636 ocean_assert(sourceChannel == (tShufflePattern & 0x0000000Fu));
3637
3638 const uint8x16_t target_u_8x16 = source_u_8x16x4.val[sourceChannel];
3639
3640 vst1q_u8((uint8_t*)target, target_u_8x16);
3641
3642 source += 16u * tSourceChannels;
3643 target += 16u * tTargetChannels;
3644 }
3645
3646 break;
3647 }
3648
3649 // 4 -> 2
3650 case (4u | (2u << 4u)):
3651 {
3652 for (size_t n = 0; n < blocks16; ++n)
3653 {
3654 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3655
3656 uint8x16x2_t target_u_8x16x2;
3657
3658 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3659 {
3660 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3661
3662 target_u_8x16x2.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3663 }
3664
3665 vst2q_u8((uint8_t*)target, target_u_8x16x2);
3666
3667 source += 16u * tSourceChannels;
3668 target += 16u * tTargetChannels;
3669 }
3670
3671 break;
3672 }
3673
3674 // 4 -> 3
3675 case (4u | (3u << 4u)):
3676 {
3677 for (size_t n = 0; n < blocks16; ++n)
3678 {
3679 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3680
3681 uint8x16x3_t target_u_8x16x3;
3682
3683 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3684 {
3685 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3686
3687 target_u_8x16x3.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3688 }
3689
3690 vst3q_u8((uint8_t*)target, target_u_8x16x3);
3691
3692 source += 16u * tSourceChannels;
3693 target += 16u * tTargetChannels;
3694 }
3695
3696 break;
3697 }
3698
3699 // 4 -> 4
3700 case (4u | (4u << 4u)):
3701 {
3702 for (size_t n = 0; n < blocks16; ++n)
3703 {
3704 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3705
3706 uint8x16x4_t target_u_8x16x4;
3707
3708 for (unsigned int nT = 0u; nT < tTargetChannels; ++nT)
3709 {
3710 ocean_assert(((tShufflePattern >> (nT * 4u)) & 0x00000003u) == ((tShufflePattern >> (nT * 4u)) & 0x0000000Fu));
3711
3712 target_u_8x16x4.val[nT] = source_u_8x16x4.val[(tShufflePattern >> (nT * 4u)) & 0x00000003u]; // possible index values {0, 1, 2, 3}
3713 }
3714
3715 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3716
3717 source += 16u * tSourceChannels;
3718 target += 16u * tTargetChannels;
3719 }
3720
3721 break;
3722 }
3723
3724 default:
3725 // we do not have a NEON-based optimization
3726 break;
3727 }
3728 }
3729
3730#endif
3731
3732 while (source != sourceEnd)
3733 {
3734 ocean_assert(source < sourceEnd);
3735
3736 for (unsigned int n = 0u; n < tTargetChannels; ++n)
3737 {
3738 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3739 }
3740
3741 source += tSourceChannels;
3742 target += tTargetChannels;
3743 }
3744}
3745
3746template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3747inline void FrameChannels::shuffleRowChannelsAndSetLastChannelValue(const T* source, T* target, const size_t size, const void* options)
3748{
3749 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3750 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3751
3752 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3753 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3754 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3755 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3756 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3757 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3758 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3759 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3760
3761 ocean_assert(source != nullptr && target != nullptr);
3762 ocean_assert(size != 0);
3763
3764 ocean_assert(options != nullptr);
3765
3766 const T lastChannelValue = *(const T*)(options);
3767
3768 const T* const sourceEnd = source + size * tSourceChannels;
3769
3770#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
3771
3772 if ((std::is_same<typename TypeMapper<T>::Type, uint8_t>::value))
3773 {
3774 const size_t blocks16 = size / size_t(16);
3775
3776 switch (tSourceChannels | ((tTargetChannels) << 4u))
3777 {
3778 // 1 -> 4
3779 case (1u | (4u << 4u)):
3780 {
3781 ocean_assert(tShufflePattern == 0u);
3782
3783 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3784
3785 uint8x16x4_t target_u_8x16x4;
3786 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3787
3788 for (size_t n = 0; n < blocks16; ++n)
3789 {
3790 const uint8x16_t source_u_8x16 = vld1q_u8((const uint8_t*)source);
3791
3792 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3793 {
3794 target_u_8x16x4.val[nT] = source_u_8x16;
3795 }
3796
3797 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3798
3799 source += 16u * tSourceChannels;
3800 target += 16u * tTargetChannels;
3801 }
3802
3803 break;
3804 }
3805
3806 // 3 -> 4
3807 case (3u | (4u << 4u)):
3808 {
3809 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3810
3811 uint8x16x4_t target_u_8x16x4;
3812 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3813
3814 for (size_t n = 0; n < blocks16; ++n)
3815 {
3816 const uint8x16x3_t source_u_8x16x3 = vld3q_u8((const uint8_t*)source);
3817
3818 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3819 {
3820 target_u_8x16x4.val[nT] = source_u_8x16x3.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 2u)]; // possible index values {0, 1, 2}
3821 }
3822
3823 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3824
3825 source += 16u * tSourceChannels;
3826 target += 16u * tTargetChannels;
3827 }
3828
3829 break;
3830 }
3831
3832 // 4 -> 4
3833 case (4u | (4u << 4u)):
3834 {
3835 const uint8x16_t lastChannelValue_u_8x16 = vmovq_n_u8(lastChannelValue);
3836
3837 uint8x16x4_t target_u_8x16x4;
3838 target_u_8x16x4.val[3] = lastChannelValue_u_8x16;
3839
3840 for (size_t n = 0; n < blocks16; ++n)
3841 {
3842 const uint8x16x4_t source_u_8x16x4 = vld4q_u8((const uint8_t*)source);
3843
3844 for (unsigned int nT = 0u; nT < tTargetChannels - 1u; ++nT)
3845 {
3846 target_u_8x16x4.val[nT] = source_u_8x16x4.val[std::min((tShufflePattern >> (nT * 4u)) & 0x0000000Fu, 3u)]; // possible index values {0, 1, 2, 3}
3847 }
3848
3849 vst4q_u8((uint8_t*)target, target_u_8x16x4);
3850
3851 source += 16u * tSourceChannels;
3852 target += 16u * tTargetChannels;
3853 }
3854
3855 break;
3856 }
3857
3858 default:
3859 // we do not have a NEON-based optimization
3860 break;
3861 }
3862 }
3863
3864#endif
3865
3866 while (source != sourceEnd)
3867 {
3868 ocean_assert(source < sourceEnd);
3869
3870 for (unsigned int n = 0u; n < tTargetChannels - 1u; ++n)
3871 {
3872 target[n] = source[(tShufflePattern >> (n * 4u)) & 0x0000000Fu];
3873 target[tTargetChannels - 1u] = lastChannelValue;
3874 }
3875
3876 source += tSourceChannels;
3877 target += tTargetChannels;
3878 }
3879}
3880
3881template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3882inline void FrameChannels::shuffleChannels(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3883{
3884 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3885 static_assert(tTargetChannels >= 1u && tTargetChannels <= 8u, "Invalid channel number!");
3886
3887 static_assert(tSourceChannels != 1u || tTargetChannels != 1u, "Invalid channel number!");
3888
3889 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3890 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3891 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3892 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3893 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3894 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3895 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3896 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3897
3898 ocean_assert(source != nullptr && target != nullptr);
3899 ocean_assert(width >= 1u && height >= 1u);
3900
3901 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3902 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3903
3904 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3905
3906 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannels<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, nullptr, worker);
3907}
3908
3909template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tShufflePattern>
3910inline void FrameChannels::shuffleChannelsAndSetLastChannelValue(const T* source, const T newChannelValue, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3911{
3912 static_assert(tSourceChannels >= 1u && tSourceChannels <= 8u, "Invalid channel number!");
3913 static_assert(tTargetChannels >= 2u && tTargetChannels <= 8u, "Invalid channel number!");
3914
3915 static_assert(((tShufflePattern & 0x0000000Fu) >> 0u) < tSourceChannels, "Invalid shuffle pattern!");
3916 static_assert(((tShufflePattern & 0x000000F0u) >> 4u) < tSourceChannels, "Invalid shuffle pattern!");
3917 static_assert(((tShufflePattern & 0x00000F00u) >> 8u) < tSourceChannels, "Invalid shuffle pattern!");
3918 static_assert(((tShufflePattern & 0x0000F000u) >> 12u) < tSourceChannels, "Invalid shuffle pattern!");
3919 static_assert(((tShufflePattern & 0x000F0000u) >> 16u) < tSourceChannels, "Invalid shuffle pattern!");
3920 static_assert(((tShufflePattern & 0x00F00000u) >> 20u) < tSourceChannels, "Invalid shuffle pattern!");
3921 static_assert(((tShufflePattern & 0x0F000000u) >> 24u) < tSourceChannels, "Invalid shuffle pattern!");
3922 static_assert(((tShufflePattern & 0xF0000000u) >> 28u) < tSourceChannels, "Invalid shuffle pattern!");
3923
3924 ocean_assert(source != nullptr && target != nullptr);
3925 ocean_assert(width >= 1u && height >= 1u);
3926
3927 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
3928 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
3929
3930 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3931
3932 const T options = newChannelValue;
3933
3934 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::shuffleRowChannelsAndSetLastChannelValue<T, tSourceChannels, tTargetChannels, tShufflePattern>, FrameChannels::reverseRowPixelOrderInPlace<T, tTargetChannels>, areContinuous, &options, worker);
3935}
3936
3937template <unsigned int tChannels>
3938inline void FrameChannels::narrow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
3939{
3940 static_assert(tChannels >= 1u, "Invalid channel number!");
3941
3942 ocean_assert(source != nullptr && target != nullptr);
3943 ocean_assert(width >= 1u && height >= 1u);
3944
3945 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
3946 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
3947
3948 const bool areContinuous = sourcePaddingElements == 0u && targetPaddingElements == 0u;
3949
3950 FrameConverter::convertGenericPixelFormat(source, target, width, height, sourceStrideElements, targetStrideElements, conversionFlag, FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel<tChannels>, FrameChannels::reverseRowPixelOrderInPlace<uint8_t, tChannels>, areContinuous, nullptr, worker);
3951}
3952
3953template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
3954void FrameChannels::applyPixelModifier(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker* worker)
3955{
3956 static_assert(tChannels > 0u, "Invalid channel number!");
3957
3958 ocean_assert(source && target);
3959 ocean_assert(width != 0u && height != 0u);
3960
3961 if (worker) {
3962 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyPixelModifierSubset<T, tChannels, tPixelFunction>, source, target, width, height, conversionFlag, 0u, 0u), 0u, height);
3963 } else {
3964 applyPixelModifierSubset<T, tChannels, tPixelFunction>(source, target, width, height, conversionFlag, 0u, height);
3965}
3966}
3967
3968template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
3969void FrameChannels::applyAdvancedPixelModifier(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3970{
3971 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3972 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3973
3974 ocean_assert(source && target);
3975 ocean_assert(width != 0u && height != 0u);
3976
3977 if (worker)
3978 {
3979 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>, source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3980 }
3981 else
3982 {
3983 applyAdvancedPixelModifierSubset<TSource, TTarget, tSourceChannels, tTargetChannels, tPixelFunction>(source, target, width, height, sourcePaddingElements, targetPaddingElements, conversionFlag, 0u, height);
3984 }
3985}
3986
3987template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
3988void FrameChannels::applyBivariateOperator(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker* worker)
3989{
3990 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
3991 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
3992
3993 ocean_assert(source0 && source1 && target);
3994 ocean_assert(width != 0u && height != 0u);
3995
3996 if (worker)
3997 {
3998 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>, source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, 0u), 0u, height);
3999 }
4000 else
4001 {
4002 FrameChannels::applyBivariateOperatorSubset<TSource0, TSource1, TTarget, TIntermediate, tSourceChannels, tTargetChannels, tOperator>(source0, source1, target, width, height, source0PaddingElements, source1PaddingElements, targetPaddingElements, conversionFlag, 0u, height);
4003 }
4004}
4005
4006template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4007void FrameChannels::applyRowOperator(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels>& rowOperatorFunction, Worker* worker)
4008{
4009 static_assert(tSourceChannels > 0u, "Invalid source channel number!");
4010 static_assert(tTargetChannels > 0u, "Invalid target channel number!");
4011
4012 ocean_assert(source != nullptr && target != nullptr);
4013 ocean_assert(width != 0u && height != 0u);
4014
4015 const unsigned int sourceStrideElements = width * tSourceChannels + sourcePaddingElements;
4016 const unsigned int targetStrideElements = width * tTargetChannels + targetPaddingElements;
4017
4018 if (worker)
4019 {
4020 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>, source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, 0u), 0u, height);
4021 }
4022 else
4023 {
4024 applyRowOperatorSubset<TSource, TTarget, tSourceChannels, tTargetChannels>(source, target, width, height, sourceStrideElements, targetStrideElements, rowOperatorFunction, 0u, height);
4025 }
4026}
4027
4028template <typename T, unsigned int tChannels>
4029inline void FrameChannels::transformGeneric(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4030{
4031 ocean_assert(source != nullptr && target != nullptr);
4032 ocean_assert(width >= 1u && height >= 1u);
4033
4034 const unsigned int bytesPerRow = width * sizeof(T) * tChannels;
4035
4036 const unsigned int sourceStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * sourcePaddingElements;
4037 const unsigned int targetStrideBytes = width * sizeof(T) * tChannels + sizeof(T) * targetPaddingElements;
4038
4039 typedef typename TypeMapper<T>::Type MappedType;
4040
4041 const RowReversePixelOrderFunction<void> rowReversePixelOrderFunction = (const RowReversePixelOrderFunction<void>)(FrameChannels::reverseRowPixelOrder<MappedType, tChannels>);
4042
4043 if (worker && height > 200u)
4044 {
4045 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::transformGenericSubset, (const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, 0u), 0u, height, 9u, 10u, 20u);
4046 }
4047 else
4048 {
4049 transformGenericSubset((const uint8_t*)(source), (uint8_t*)(target), width, height, conversionFlag, rowReversePixelOrderFunction, bytesPerRow, sourceStrideBytes, targetStrideBytes, 0u, height);
4050 }
4051}
4052
4053template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4054void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4055{
4056 static_assert(tChannels >= 2u, "Invalid channel number!");
4057 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4058
4059 ocean_assert(frame != nullptr);
4060 ocean_assert(width >= 1u && height >= 1u);
4061
4062 if (worker && height > 200u)
4063 {
4064 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4065 }
4066 else
4067 {
4068 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4069 }
4070}
4071
4072template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4073void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4074{
4075 static_assert(tChannels >= 2u, "Invalid channel number!");
4076 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4077
4078 ocean_assert(source != nullptr && target != nullptr);
4079 ocean_assert(width >= 1u && height >= 1u);
4080
4081 if (worker && height > 200u)
4082 {
4083 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4084 }
4085 else
4086 {
4087 premultipliedAlphaToStraightAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4088 }
4089}
4090
4091template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4092void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t* const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker* worker)
4093{
4094 static_assert(tChannels >= 2u, "Invalid channel number!");
4095 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4096
4097 ocean_assert(frame != nullptr);
4098 ocean_assert(width >= 1u && height >= 1u);
4099
4100 if (worker && height > 200u)
4101 {
4102 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, frame, width, framePaddingElements, 0u, 0u), 0u, height, 3u, 4u, 20u);
4103 }
4104 else
4105 {
4106 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(frame, width, framePaddingElements, 0u, height);
4107 }
4108}
4109
4110template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
4111void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannel(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker* worker)
4112{
4113 static_assert(tChannels >= 2u, "Invalid channel number!");
4114 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
4115
4116 ocean_assert(source != nullptr && target != nullptr);
4117 ocean_assert(width >= 1u && height >= 1u);
4118
4119 if (worker && height > 200u)
4120 {
4121 worker->executeFunction(Worker::Function::createStatic(&FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>, source, target, width, sourcePaddingElements, targetPaddingElements, 0u, 0u), 0u, height, 5u, 6u, 20u);
4122 }
4123 else
4124 {
4125 straightAlphaToPremultipliedAlpha8BitPerChannelSubset<tChannels, tAlphaChannelIndex>(source, target, width, sourcePaddingElements, targetPaddingElements, 0u, height);
4126 }
4127}
4128
4129template <unsigned int tChannels>
4130void FrameChannels::narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t* source, uint8_t* target, const size_t size, const void* /* unusedParameters */)
4131{
4132 static_assert(tChannels >= 1u, "Invalid channel number!");
4133
4134 ocean_assert(source != nullptr && target != nullptr);
4135 ocean_assert(size > 0);
4136
4137 const uint16_t* const sourceEnd = source + size * tChannels;
4138
4139#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4140
4141 const size_t blocks8 = size / size_t(8);
4142
4143 switch (tChannels)
4144 {
4145 case 4u:
4146 {
4147 for (size_t n = 0; n < blocks8; ++n)
4148 {
4149 const uint16x8_t sourceA_u_16x8 = vld1q_u16(source + 0);
4150 const uint16x8_t sourceB_u_16x8 = vld1q_u16(source + 8);
4151 const uint16x8_t sourceC_u_16x8 = vld1q_u16(source + 16);
4152 const uint16x8_t sourceD_u_16x8 = vld1q_u16(source + 24);
4153
4154 const uint8x16_t targetAB_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceA_u_16x8, 8), vqrshrn_n_u16(sourceB_u_16x8, 8)); // narrowing rounded right shift: target = (source + 128) / 256
4155 const uint8x16_t targetCD_u_8x16 = vcombine_u8(vqrshrn_n_u16(sourceC_u_16x8, 8), vqrshrn_n_u16(sourceD_u_16x8, 8));
4156
4157 vst1q_u8(target + 0, targetAB_u_8x16);
4158 vst1q_u8(target + 16, targetCD_u_8x16);
4159
4160 source += 8u * tChannels;
4161 target += 8u * tChannels;
4162 }
4163
4164 break;
4165 }
4166
4167 default:
4168 break;
4169 }
4170
4171#endif
4172
4173 while (source != sourceEnd)
4174 {
4175 ocean_assert(source < sourceEnd);
4176
4177 for (unsigned int n = 0u; n < tChannels; ++n)
4178 {
4179 ocean_assert((uint16_t)(source[n] >> 8u) <= 255u);
4180 target[n] = (uint8_t)(source[n] >> 8u);
4181 }
4182
4183 source += tChannels;
4184 target += tChannels;
4185 }
4186}
4187
4188template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4189void FrameChannels::addChannelRow(const void** sources, void** targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void* options)
4190{
4191 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4192 static_assert(sizeof(size_t) == sizeof(const T*), "Invalid pointer size!");
4193
4194 ocean_assert(sources != nullptr && targets != nullptr);
4195 ocean_assert(width != 0u && height != 0u);
4196 ocean_assert(multipleRowIndex < height);
4197 ocean_assert(options != nullptr);
4198
4199 const T* source = (const T*)(sources[0]);
4200 const T* sourceOneChannel = (const T*)(sources[1]);
4201 ocean_assert(source != nullptr && sourceOneChannel != nullptr);
4202
4203 T* target = (T*)(targets[0]);
4204 ocean_assert(target != nullptr);
4205
4206 const unsigned int* uintOptions = (const unsigned int*)options;
4207 ocean_assert(uintOptions != nullptr);
4208
4209 const unsigned int sourcePaddingElements = uintOptions[0];
4210 const unsigned int sourceOneChannelPaddingElements = uintOptions[1];
4211 const unsigned int targetPaddingElements = uintOptions[2];
4212
4213 const unsigned int targetChannels = tSourceChannels + 1u;
4214
4215 const unsigned int sourceStrideElements = tSourceChannels * width + sourcePaddingElements;
4216 const unsigned int sourceOneChannelStrideElements = width + sourceOneChannelPaddingElements;
4217 const unsigned int targetStrideElements = targetChannels * width + targetPaddingElements;
4218
4219 const bool flipTarget = conversionFlag == CONVERT_FLIPPED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4220 const bool mirrorTarget = conversionFlag == CONVERT_MIRRORED || conversionFlag == CONVERT_FLIPPED_AND_MIRRORED;
4221
4222 const T* sourceRow = source + sourceStrideElements * multipleRowIndex;
4223 const T* sourceOneChannelRow = sourceOneChannel + sourceOneChannelStrideElements * multipleRowIndex;
4224 T* targetRow = flipTarget ? target + targetStrideElements * (height - multipleRowIndex - 1u) : target + targetStrideElements * multipleRowIndex;
4225
4226 if (mirrorTarget == false)
4227 {
4228 for (unsigned int n = 0u; n < width; ++n)
4229 {
4230 if constexpr (tAddToFront)
4231 {
4232 targetRow[0] = sourceOneChannelRow[0];
4233
4234 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4235 {
4236 targetRow[c + 1u] = sourceRow[c];
4237 }
4238 }
4239 else
4240 {
4241 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4242 {
4243 targetRow[c] = sourceRow[c];
4244 }
4245
4246 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4247 }
4248
4249 sourceRow += tSourceChannels;
4250 sourceOneChannelRow++;
4251
4252 targetRow += targetChannels;
4253 }
4254 }
4255 else
4256 {
4257 targetRow += targetChannels * (width - 1u);
4258
4259 for (unsigned int n = 0u; n < width; ++n)
4260 {
4261 if constexpr (tAddToFront)
4262 {
4263 targetRow[0] = sourceOneChannelRow[0];
4264
4265 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4266 {
4267 targetRow[c + 1u] = sourceRow[c];
4268 }
4269 }
4270 else
4271 {
4272 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4273 {
4274 targetRow[c] = sourceRow[c];
4275 }
4276
4277 targetRow[tSourceChannels] = sourceOneChannelRow[0];
4278 }
4279
4280 sourceRow += tSourceChannels;
4281 sourceOneChannelRow++;
4282
4283 targetRow -= targetChannels;
4284 }
4285 }
4286}
4287
4288template <typename T, unsigned int tSourceChannels, bool tAddToFront>
4289void FrameChannels::addChannelValueRow(const T* source, T* target, const size_t size, const void* channelValueParameter)
4290{
4291 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4292
4293 ocean_assert(source != nullptr && target != nullptr);
4294 ocean_assert(size > 0);
4295 ocean_assert(channelValueParameter != nullptr);
4296
4297 const T& channelValue = *((const T*)channelValueParameter);
4298
4299 const unsigned int targetChannels = tSourceChannels + 1u;
4300
4301 for (size_t n = 0; n < size; ++n)
4302 {
4303 if constexpr (tAddToFront)
4304 {
4305 target[0] = channelValue;
4306
4307 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4308 {
4309 target[c + 1u] = source[c];
4310 }
4311 }
4312 else
4313 {
4314 for (unsigned int c = 0u; c < tSourceChannels; ++c)
4315 {
4316 target[c] = source[c];
4317 }
4318
4319 target[tSourceChannels] = channelValue;
4320 }
4321
4322 source += tSourceChannels;
4323 target += targetChannels;
4324 }
4325}
4326
4327template <typename T, unsigned int tSourceChannels, unsigned int tTargetChannels, unsigned int tSourceChannelIndex, unsigned int tTargetChannelIndex>
4328void FrameChannels::copyChannelRow(const T* source, T* target, const size_t size, const void* /*unusedParameters*/)
4329{
4330 static_assert(tSourceChannels != 0u, "Invalid channel number!");
4331 static_assert(tTargetChannels != 0u, "Invalid channel number!");
4332
4333 static_assert(tSourceChannelIndex < tSourceChannels, "Invalid channel number!");
4334 static_assert(tTargetChannelIndex < tTargetChannels, "Invalid channel number!");
4335
4336 ocean_assert(source != nullptr && target != nullptr);
4337 ocean_assert(size > 0);
4338
4339 for (size_t n = 0; n < size; ++n)
4340 {
4341 target[tTargetChannelIndex] = source[tSourceChannelIndex];
4342
4343 source += tSourceChannels;
4344 target += tTargetChannels;
4345 }
4346}
4347
4348template <typename TSource, typename TTarget>
4349void FrameChannels::separateTo1ChannelRuntime(const TSource* const sourceFrame, TTarget* const* const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int* targetFramesPaddingElements)
4350{
4351 ocean_assert(sourceFrame != nullptr);
4352 ocean_assert(targetFrames != nullptr);
4353
4354 ocean_assert(width != 0u && height != 0u);
4355 ocean_assert(channels != 0u);
4356
4357#ifdef OCEAN_DEBUG
4358 for (unsigned int c = 0u; c < channels; ++c)
4359 {
4360 ocean_assert(targetFrames[c] != nullptr);
4361 }
4362#endif
4363
4364 if (sourceFramePaddingElements == 0u && targetFramesPaddingElements == nullptr)
4365 {
4366 for (unsigned int n = 0u; n < width * height; ++n)
4367 {
4368 for (unsigned int c = 0u; c < channels; ++c)
4369 {
4370 targetFrames[c][n] = TTarget(sourceFrame[n * channels + c]);
4371 }
4372 }
4373 }
4374 else if (targetFramesPaddingElements == nullptr)
4375 {
4376 ocean_assert(sourceFramePaddingElements != 0u);
4377
4378 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4379
4380 for (unsigned int y = 0u; y < height; ++y)
4381 {
4382 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4383
4384 const unsigned int targetRowOffset = y * width;
4385
4386 for (unsigned int x = 0u; x < width; ++x)
4387 {
4388 for (unsigned int c = 0u; c < channels; ++c)
4389 {
4390 *(targetFrames[c] + targetRowOffset + x) = TTarget(*(sourceRow + x * channels + c));
4391 }
4392 }
4393 }
4394 }
4395 else
4396 {
4397 const unsigned int sourceFrameStrideElements = width * channels + sourceFramePaddingElements;
4398
4399 Indices32 targetFrameStrideElements(channels);
4400
4401 for (unsigned int c = 0u; c < channels; ++c)
4402 {
4403 targetFrameStrideElements[c] = width + targetFramesPaddingElements[c];
4404 }
4405
4406 for (unsigned int y = 0u; y < height; ++y)
4407 {
4408 const TSource* const sourceRow = sourceFrame + y * sourceFrameStrideElements;
4409
4410 for (unsigned int x = 0u; x < width; ++x)
4411 {
4412 for (unsigned int c = 0u; c < channels; ++c)
4413 {
4414 *(targetFrames[c] + y * targetFrameStrideElements[c] + x) = TTarget(*(sourceRow + x * channels + c));
4415 }
4416 }
4417 }
4418 }
4419}
4420
4421template <typename TSource, typename TTarget>
4422void FrameChannels::zipChannelsRuntime(const TSource* const* sourceFrames, TTarget* const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int* sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
4423{
4424 ocean_assert(sourceFrames != nullptr);
4425 ocean_assert(targetFrame != nullptr);
4426
4427 ocean_assert(width != 0u && height != 0u);
4428 ocean_assert(channels != 0u);
4429
4430 bool allSourceFramesContinuous = true;
4431
4432 if (sourceFramesPaddingElements != nullptr)
4433 {
4434 for (unsigned int n = 0u; n < channels; ++n)
4435 {
4436 if (sourceFramesPaddingElements[n] != 0u)
4437 {
4438 allSourceFramesContinuous = false;
4439 break;
4440 }
4441 }
4442 }
4443
4444 if (allSourceFramesContinuous && targetFramePaddingElements == 0u)
4445 {
4446 for (unsigned int n = 0u; n < width * height; ++n)
4447 {
4448 for (unsigned int c = 0u; c < channels; ++c)
4449 {
4450 targetFrame[n * channels + c] = TTarget(sourceFrames[c][n]);
4451 }
4452 }
4453 }
4454 else
4455 {
4456 const unsigned int targetFrameStrideElements = width * channels + targetFramePaddingElements;
4457
4458 Indices32 sourceFrameStrideElements(channels);
4459
4460 for (unsigned int c = 0u; c < channels; ++c)
4461 {
4462 if (sourceFramesPaddingElements == nullptr)
4463 {
4464 sourceFrameStrideElements[c] = width;
4465 }
4466 else
4467 {
4468 sourceFrameStrideElements[c] = width + sourceFramesPaddingElements[c];
4469 }
4470 }
4471
4472 for (unsigned int y = 0u; y < height; ++y)
4473 {
4474 TTarget* const targetRow = targetFrame + y * targetFrameStrideElements;
4475
4476 for (unsigned int x = 0u; x < width; ++x)
4477 {
4478 for (unsigned int c = 0u; c < channels; ++c)
4479 {
4480 *(targetRow + x * channels + c) = TTarget(*(sourceFrames[c] + y * sourceFrameStrideElements[c] + x));
4481 }
4482 }
4483 }
4484 }
4485}
4486
4487template <typename T, unsigned int tChannel, unsigned int tChannels>
4488void FrameChannels::setChannelSubset(T* frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
4489{
4490 static_assert(tChannels >= 1u, "Invalid channel number!");
4491 static_assert(tChannel < tChannels, "Invalid channel index!");
4492
4493 ocean_assert(frame != nullptr);
4494
4495 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
4496
4497 frame += firstRow * frameStrideElements + tChannel;
4498
4499 for (unsigned int n = 0u; n < numberRows; ++n)
4500 {
4501 for (unsigned int x = 0u; x < width; ++x)
4502 {
4503 frame[x * tChannels] = value;
4504 }
4505
4506 frame += frameStrideElements;
4507 }
4508}
4509
4510template <typename T, unsigned int tChannels, void (*tPixelFunction)(const T*, T*)>
4511void FrameChannels::applyPixelModifierSubset(const T* source, T* target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4512{
4513 static_assert(tChannels >= 1u, "Invalid channel number");
4514
4515 ocean_assert(source && target);
4516 ocean_assert(source != target);
4517
4518 ocean_assert(numberRows > 0u);
4519 ocean_assert(firstRow + numberRows <= height);
4520
4521 const unsigned int widthElements = width * tChannels;
4522 const unsigned int targetBlockSize = widthElements * numberRows;
4523
4524 switch (conversionFlag)
4525 {
4526 case CONVERT_NORMAL:
4527 {
4528 source += firstRow * widthElements;
4529 target += firstRow * widthElements;
4530
4531 const T* const targetEnd = target + targetBlockSize;
4532
4533 while (target != targetEnd)
4534 {
4535 tPixelFunction(source, target);
4536
4537 source += tChannels;
4538 target += tChannels;
4539 }
4540
4541 break;
4542 }
4543
4544 case CONVERT_FLIPPED:
4545 {
4546 source += firstRow * widthElements;
4547 target += width * height * tChannels - (firstRow + 1u) * widthElements;
4548
4549 const T* const targetEnd = target - targetBlockSize;
4550
4551 while (target != targetEnd)
4552 {
4553 const T* const targetRowEnd = target + widthElements;
4554
4555 while (target != targetRowEnd)
4556 {
4557 tPixelFunction(source, target);
4558
4559 source += tChannels;
4560 target += tChannels;
4561 }
4562
4563 target -= (widthElements << 1); // width * tChannels * 2
4564 }
4565
4566 break;
4567 }
4568
4569 case CONVERT_MIRRORED:
4570 {
4571 source += firstRow * widthElements;
4572 target += (firstRow + 1u) * widthElements;
4573
4574 const T* const targetEnd = target + targetBlockSize;
4575
4576 while (target != targetEnd)
4577 {
4578 const T* const targetRowEnd = target - widthElements;
4579
4580 while (target != targetRowEnd)
4581 {
4582 tPixelFunction(source, target -= tChannels);
4583
4584 source += tChannels;
4585 }
4586
4587 target += widthElements << 1; // width * tChannels * 2;
4588 }
4589
4590 break;
4591 }
4592
4594 {
4595 source += firstRow * widthElements;
4596 target += width * height * tChannels - firstRow * widthElements;
4597
4598 const T* const targetEnd = target - targetBlockSize;
4599
4600 while (target != targetEnd)
4601 {
4602 tPixelFunction(source, target -= tChannels);
4603
4604 source += tChannels;
4605 }
4606
4607 break;
4608 }
4609
4610 // default: this case is not handled
4611 }
4612}
4613
4614template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tPixelFunction)(const TSource*, TTarget*)>
4615void FrameChannels::applyAdvancedPixelModifierSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4616{
4617 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4618 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4619
4620 ocean_assert(source && target);
4621 ocean_assert((void*)source != (void*)target);
4622
4623 ocean_assert(numberRows != 0u);
4624 ocean_assert(firstRow + numberRows <= height);
4625
4626 const unsigned int sourceWidthElements = width * tSourceChannels;
4627 const unsigned int targetWidthElements = width * tTargetChannels;
4628
4629 const unsigned int sourceStrideElements = sourceWidthElements + sourcePaddingElements;
4630 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4631
4632 switch (conversionFlag)
4633 {
4634 case CONVERT_NORMAL:
4635 {
4636 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4637 {
4638 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4639 TTarget* targetPixel = target + rowIndex * targetStrideElements;
4640
4641 for (unsigned int x = 0u; x < width; ++x)
4642 {
4643 tPixelFunction(sourcePixel, targetPixel);
4644
4645 sourcePixel += tSourceChannels;
4646 targetPixel += tTargetChannels;
4647 }
4648 }
4649
4650 break;
4651 }
4652
4653 case CONVERT_FLIPPED:
4654 {
4655 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4656 {
4657 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4658 TTarget* targetPixel = target + (height - rowIndex - 1u) * targetStrideElements;
4659
4660 for (unsigned int x = 0u; x < width; ++x)
4661 {
4662 tPixelFunction(sourcePixel, targetPixel);
4663
4664 sourcePixel += tSourceChannels;
4665 targetPixel += tTargetChannels;
4666 }
4667 }
4668
4669 break;
4670 }
4671
4672 case CONVERT_MIRRORED:
4673 {
4674 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4675 {
4676 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4677
4678 TTarget* const targetRowBegin = target + rowIndex * targetStrideElements;
4679 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4680
4681 for (unsigned int x = 0u; x < width; ++x)
4682 {
4683 ocean_assert(targetPixel >= targetRowBegin);
4684 tPixelFunction(sourcePixel, targetPixel);
4685
4686 sourcePixel += tSourceChannels;
4687 targetPixel -= tTargetChannels;
4688 }
4689 }
4690
4691 break;
4692 }
4693
4695 {
4696 for (unsigned int rowIndex = firstRow; rowIndex < firstRow + numberRows; ++rowIndex)
4697 {
4698 const TSource* sourcePixel = source + rowIndex * sourceStrideElements;
4699
4700 TTarget* const targetRowBegin = target + (height - rowIndex - 1u) * targetStrideElements;
4701 TTarget* targetPixel = targetRowBegin + targetWidthElements - tTargetChannels;
4702
4703 for (unsigned int x = 0u; x < width; ++x)
4704 {
4705 ocean_assert(targetPixel >= targetRowBegin);
4706 tPixelFunction(sourcePixel, targetPixel);
4707
4708 sourcePixel += tSourceChannels;
4709 targetPixel -= tTargetChannels;
4710 }
4711 }
4712
4713 break;
4714 }
4715
4716 // default: this case is not handled
4717 }
4718}
4719
4720template <typename TSource0, typename TSource1, typename TTarget, typename TIntermediate, unsigned int tSourceChannels, unsigned int tTargetChannels, void (*tOperator)(const TSource0*, const TSource1*, TTarget*)>
4721void FrameChannels::applyBivariateOperatorSubset(const TSource0* source0, const TSource1* source1, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
4722{
4723 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4724 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4725 static_assert(tOperator, "Invalid operator function");
4726
4727 ocean_assert(source0 != nullptr && source1 != nullptr && target != nullptr);
4728 ocean_assert((const void*)(source0) != (const void*)(target));
4729 ocean_assert((const void*)(source1) != (const void*)(target));
4730
4731 ocean_assert(numberRows != 0u);
4732 ocean_assert(firstRow + numberRows <= height);
4733
4734 const unsigned int source0StrideElements = width * tSourceChannels + source0PaddingElements;
4735 const unsigned int source1StrideElements = width * tSourceChannels + source1PaddingElements;
4736
4737 const unsigned int targetWidthElements = width * tTargetChannels;
4738
4739 const unsigned int targetStrideElements = targetWidthElements + targetPaddingElements;
4740
4741 switch (conversionFlag)
4742 {
4743 case CONVERT_NORMAL:
4744 {
4745 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4746 {
4747 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4748 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4749
4750 TTarget* rowTarget = target + rowIndex * targetStrideElements;
4751 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4752
4753 while (rowTarget != rowTargetEnd)
4754 {
4755 ocean_assert(rowTarget < rowTargetEnd);
4756
4757 tOperator(rowSource0, rowSource1, rowTarget);
4758
4759 rowSource0 += tSourceChannels;
4760 rowSource1 += tSourceChannels;
4761
4762 rowTarget += tTargetChannels;
4763 }
4764 }
4765
4766 return;
4767 }
4768
4769 case CONVERT_FLIPPED:
4770 {
4771 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4772 {
4773 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4774 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4775
4776 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements;
4777 const TTarget* const rowTargetEnd = rowTarget + targetWidthElements;
4778
4779 while (rowTarget != rowTargetEnd)
4780 {
4781 ocean_assert(rowTarget < rowTargetEnd);
4782
4783 tOperator(rowSource0, rowSource1, rowTarget);
4784
4785 rowSource0 += tSourceChannels;
4786 rowSource1 += tSourceChannels;
4787
4788 rowTarget += tTargetChannels;
4789 }
4790 }
4791
4792 return;
4793 }
4794
4795 case CONVERT_MIRRORED:
4796 {
4797 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4798 {
4799 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4800 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4801
4802 TTarget* rowTarget = target + rowIndex * targetStrideElements + targetWidthElements - tTargetChannels;
4803 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4804
4805 while (rowTarget != rowTargetEnd)
4806 {
4807 ocean_assert(rowTarget > rowTargetEnd);
4808
4809 tOperator(rowSource0, rowSource1, rowTarget);
4810
4811 rowSource0 += tSourceChannels;
4812 rowSource1 += tSourceChannels;
4813
4814 rowTarget -= tTargetChannels;
4815 }
4816 }
4817
4818 return;
4819 }
4820
4822 {
4823 for (unsigned int rowIndex = firstRow; rowIndex < (firstRow + numberRows); ++rowIndex)
4824 {
4825 const TSource0* rowSource0 = source0 + rowIndex * source0StrideElements;
4826 const TSource1* rowSource1 = source1 + rowIndex * source1StrideElements;
4827
4828 TTarget* rowTarget = target + (height - rowIndex - 1u) * targetStrideElements + targetWidthElements - tTargetChannels;
4829 const TTarget* const rowTargetEnd = rowTarget - targetWidthElements;
4830
4831 while (rowTarget != rowTargetEnd)
4832 {
4833 ocean_assert(rowTarget > rowTargetEnd);
4834
4835 tOperator(rowSource0, rowSource1, rowTarget);
4836
4837 rowSource0 += tSourceChannels;
4838 rowSource1 += tSourceChannels;
4839
4840 rowTarget -= tTargetChannels;
4841 }
4842 }
4843
4844 return;
4845 }
4846
4847 default:
4848 ocean_assert(false && "This should never happen!");
4849 break;
4850 }
4851}
4852
4853template <typename TSource, typename TTarget, unsigned int tSourceChannels, unsigned int tTargetChannels>
4854void FrameChannels::applyRowOperatorSubset(const TSource* source, TTarget* target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction<TSource, TTarget, tSourceChannels, tTargetChannels> rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
4855{
4856 static_assert(tSourceChannels >= 1u, "Invalid source channel number");
4857 static_assert(tTargetChannels >= 1u, "Invalid target channel number");
4858
4859 ocean_assert(source != nullptr && target != nullptr);
4860 ocean_assert((const void*)source != (const void*)target);
4861
4862 ocean_assert(width * tSourceChannels <= sourceStrideElements);
4863 ocean_assert(width * tTargetChannels <= targetStrideElements);
4864
4865 ocean_assert(rowOperatorFunction != nullptr);
4866
4867 ocean_assert(numberRows != 0u);
4868 ocean_assert(firstRow + numberRows <= height);
4869
4870 for (unsigned int y = firstRow; y < firstRow + numberRows; ++y)
4871 {
4872 rowOperatorFunction(source + y * sourceStrideElements, target + y * targetStrideElements, width, height, y, sourceStrideElements, targetStrideElements);
4873 }
4874}
4875
4876template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
4877void FrameChannels::convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4878{
4879 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid channel factors!");
4880
4881 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4882 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4883 ocean_assert(channelFactors_128 != nullptr);
4884
4885 const unsigned int factorChannel0_128 = channelFactors_128[0];
4886 const unsigned int factorChannel1_128 = channelFactors_128[1];
4887 const unsigned int factorChannel2_128 = channelFactors_128[2];
4888
4889 ocean_assert(factorChannel0_128 <= 128u && factorChannel1_128 <= 128u && factorChannel2_128 <= 128u);
4890 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 == 128u);
4891
4892 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4893 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4894 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4895
4896 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4897
4898 const uint8_t* const targetEnd = target + size;
4899
4900#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4901
4902 constexpr size_t blockSize = 16;
4903 const size_t blocks = size / blockSize;
4904
4905 const __m128i multiplicationFactors0_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel0_128));
4906 const __m128i multiplicationFactors1_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel1_128));
4907 const __m128i multiplicationFactors2_128_u_16x8 = _mm_set1_epi16(int16_t(factorChannel2_128));
4908
4909 for (size_t n = 0; n < blocks; ++n)
4910 {
4911 convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, multiplicationFactors0_128_u_16x8, multiplicationFactors1_128_u_16x8, multiplicationFactors2_128_u_16x8);
4912
4913 source += blockSize * size_t(3);
4914 target += blockSize;
4915 }
4916
4917#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4918
4919 constexpr size_t blockSize = 8;
4920 const size_t blocks = size / blockSize;
4921
4922 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4923 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4924 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4925
4926 for (size_t n = 0; n < blocks; ++n)
4927 {
4928 convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8);
4929
4930 source += blockSize * size_t(3);
4931 target += blockSize;
4932 }
4933
4934#endif
4935
4936 while (target != targetEnd)
4937 {
4938 ocean_assert(target < targetEnd);
4939
4940 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
4941 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
4942 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
4943
4944 *target++ = (uint8_t)((channel0 + channel1 + channel2 + 64u) >> 7u);
4945 source += 3;
4946 }
4947}
4948
4949template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
4950void FrameChannels::convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t* source, uint8_t* target, const size_t size, const void* channelMultiplicationFactors_128)
4951{
4952 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid channel factors!");
4953
4954 ocean_assert(channelMultiplicationFactors_128 != nullptr);
4955 const unsigned int* channelFactors_128 = reinterpret_cast<const unsigned int*>(channelMultiplicationFactors_128);
4956 ocean_assert(channelFactors_128 != nullptr);
4957
4958 const unsigned int factorChannel0_128 = channelFactors_128[0];
4959 const unsigned int factorChannel1_128 = channelFactors_128[1];
4960 const unsigned int factorChannel2_128 = channelFactors_128[2];
4961 const unsigned int factorChannel3_128 = channelFactors_128[3];
4962
4963 ocean_assert(factorChannel0_128 <= 127u && factorChannel1_128 <= 127u && factorChannel2_128 <= 127u && factorChannel3_128 <= 127u);
4964 ocean_assert(factorChannel0_128 + factorChannel1_128 + factorChannel2_128 + factorChannel3_128 == 128u);
4965
4966 ocean_assert(tUseFactorChannel0 == (factorChannel0_128 != 0u));
4967 ocean_assert(tUseFactorChannel1 == (factorChannel1_128 != 0u));
4968 ocean_assert(tUseFactorChannel2 == (factorChannel2_128 != 0u));
4969 ocean_assert(tUseFactorChannel3 == (factorChannel3_128 != 0u));
4970
4971 ocean_assert(source != nullptr && target != nullptr && size >= 1);
4972
4973 const uint8_t* const targetEnd = target + size;
4974
4975#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
4976
4977 constexpr size_t blockSize = 16;
4978 const size_t blocks = size / blockSize;
4979
4980 const __m128i m128_multiplicationFactors = _mm_set1_epi32(int(factorChannel0_128 | (factorChannel1_128 << 8u) | (factorChannel2_128 << 16u) | (factorChannel3_128 << 24u)));
4981
4982 for (size_t n = 0; n < blocks; ++n)
4983 {
4984 convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(source, target, m128_multiplicationFactors);
4985
4986 source += blockSize * size_t(4);
4987 target += blockSize;
4988 }
4989
4990#elif defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
4991
4992 constexpr size_t blockSize = 8;
4993 const size_t blocks = size / blockSize;
4994
4995 const uint8x8_t factorChannel0_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel0_128);
4996 const uint8x8_t factorChannel1_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel1_128);
4997 const uint8x8_t factorChannel2_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel2_128);
4998 const uint8x8_t factorChannel3_128_u_8x8 = vdup_n_u8((uint8_t)factorChannel3_128);
4999
5000 for (size_t n = 0; n < blocks; ++n)
5001 {
5002 convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON<tUseFactorChannel0, tUseFactorChannel1, tUseFactorChannel2, tUseFactorChannel3>(source, target, factorChannel0_128_u_8x8, factorChannel1_128_u_8x8, factorChannel2_128_u_8x8, factorChannel3_128_u_8x8);
5003
5004 source += blockSize * size_t(4);
5005 target += blockSize;
5006 }
5007
5008#endif
5009
5010 while (target != targetEnd)
5011 {
5012 ocean_assert(target < targetEnd);
5013
5014 const unsigned int channel0 = tUseFactorChannel0 ? (source[0] * factorChannel0_128) : 0u;
5015 const unsigned int channel1 = tUseFactorChannel1 ? (source[1] * factorChannel1_128) : 0u;
5016 const unsigned int channel2 = tUseFactorChannel2 ? (source[2] * factorChannel2_128) : 0u;
5017 const unsigned int channel3 = tUseFactorChannel3 ? (source[3] * factorChannel3_128) : 0u;
5018
5019 *target++ = (uint8_t)((channel0 + channel1 + channel2 + channel3 + 64u) >> 7u);
5020 source += 4;
5021 }
5022}
5023
5024template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5025void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5026{
5027 static_assert(tChannels >= 2u, "Invalid channel number!");
5028 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5029
5030 ocean_assert(frame != nullptr);
5031 ocean_assert(width >= 1u);
5032
5033 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5034
5035 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5036
5037 for (unsigned int y = 0u; y < numberRows; ++y)
5038 {
5039 for (unsigned int x = 0u; x < width; ++x)
5040 {
5041 if (frameRow[tAlphaChannelIndex])
5042 {
5043 const uint8_t alpha_2 = frameRow[tAlphaChannelIndex] / 2u;
5044
5045 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5046 {
5047 if (channelIndex != tAlphaChannelIndex)
5048 {
5049 frameRow[channelIndex] = uint8_t(std::min((frameRow[channelIndex] * 255u + alpha_2) / frameRow[tAlphaChannelIndex], 255u));
5050 }
5051 }
5052 }
5053
5054 frameRow += tChannels;
5055 }
5056
5057 frameRow += framePaddingElements;
5058 }
5059}
5060
5061template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5062void FrameChannels::premultipliedAlphaToStraightAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5063{
5064 static_assert(tChannels >= 2u, "Invalid channel number!");
5065 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5066
5067 ocean_assert(source != nullptr && target != nullptr);
5068 ocean_assert(width >= 1u);
5069
5070 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5071 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5072
5073 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5074 uint8_t* targetRow = target + targetStrideElements * firstRow;
5075
5076 for (unsigned int y = 0u; y < numberRows; ++y)
5077 {
5078 for (unsigned int x = 0u; x < width; ++x)
5079 {
5080 if (sourceRow[tAlphaChannelIndex])
5081 {
5082 const uint8_t alpha_2 = sourceRow[tAlphaChannelIndex] / 2u;
5083
5084 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5085 {
5086 if (channelIndex != tAlphaChannelIndex)
5087 {
5088 targetRow[channelIndex] = uint8_t(std::max((sourceRow[channelIndex] * 255u + alpha_2) / sourceRow[tAlphaChannelIndex], 255u));
5089 }
5090 else
5091 {
5092 targetRow[channelIndex] = sourceRow[channelIndex];
5093 }
5094 }
5095 }
5096 else
5097 {
5098 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5099 {
5100 targetRow[channelIndex] = sourceRow[channelIndex];
5101 }
5102 }
5103
5104 sourceRow += tChannels;
5105 targetRow += tChannels;
5106 }
5107
5108 sourceRow += sourcePaddingElements;
5109 targetRow += targetPaddingElements;
5110 }
5111}
5112
5113template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5114void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t* const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5115{
5116 static_assert(tChannels >= 2u, "Invalid channel number!");
5117 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5118
5119 ocean_assert(frame != nullptr);
5120 ocean_assert(width >= 1u);
5121
5122 const unsigned int frameStrideElements = width * tChannels + framePaddingElements;
5123
5124 uint8_t* frameRow = frame + frameStrideElements * firstRow;
5125
5126 for (unsigned int y = 0u; y < numberRows; ++y)
5127 {
5128 for (unsigned int x = 0u; x < width; ++x)
5129 {
5130 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5131 {
5132 if (channelIndex != tAlphaChannelIndex)
5133 {
5134 frameRow[channelIndex] = (frameRow[channelIndex] * frameRow[tAlphaChannelIndex] + 127u) / 255u;
5135 }
5136 }
5137
5138 frameRow += tChannels;
5139 }
5140
5141 frameRow += framePaddingElements;
5142 }
5143}
5144
5145template <unsigned int tChannels, unsigned int tAlphaChannelIndex>
5146void FrameChannels::straightAlphaToPremultipliedAlpha8BitPerChannelSubset(const uint8_t* const source, uint8_t* const target, const unsigned int width, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const unsigned int firstRow, const unsigned int numberRows)
5147{
5148 static_assert(tChannels >= 2u, "Invalid channel number!");
5149 static_assert(tAlphaChannelIndex < tChannels, "Invalid alpha channel index!");
5150
5151 ocean_assert(source != nullptr && target != nullptr);
5152 ocean_assert(width >= 1u);
5153
5154 const unsigned int sourceStrideElements = width * tChannels + sourcePaddingElements;
5155 const unsigned int targetStrideElements = width * tChannels + targetPaddingElements;
5156
5157 const uint8_t* sourceRow = source + sourceStrideElements * firstRow;
5158 uint8_t* targetRow = target + targetStrideElements * firstRow;
5159
5160 for (unsigned int y = 0u; y < numberRows; ++y)
5161 {
5162 for (unsigned int x = 0u; x < width; ++x)
5163 {
5164 for (unsigned int channelIndex = 0u; channelIndex < tChannels; ++channelIndex)
5165 {
5166 if (channelIndex != tAlphaChannelIndex)
5167 {
5168 targetRow[channelIndex] = (sourceRow[channelIndex] * sourceRow[tAlphaChannelIndex] + 127u) / 255u;
5169 }
5170 else
5171 {
5172 targetRow[channelIndex] = sourceRow[channelIndex];
5173 }
5174 }
5175
5176 sourceRow += tChannels;
5177 targetRow += tChannels;
5178 }
5179
5180 sourceRow += sourcePaddingElements;
5181 targetRow += targetPaddingElements;
5182 }
5183}
5184
5185#if defined(OCEAN_HARDWARE_SSE_VERSION) && OCEAN_HARDWARE_SSE_VERSION >= 41
5186
5187OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0_128_u_16x8, const __m128i& multiplicationFactors1_128_u_16x8, const __m128i& multiplicationFactors2_128_u_16x8)
5188{
5189 ocean_assert(source != nullptr && target != nullptr);
5190
5191 // the documentation of this function is designed for RGB24 to Y8 conversion
5192 // however, in general this function can be used to apply a linear combination on the four source channels
5193 // to create one output channel
5194
5195 // precise color space conversion:
5196 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5197
5198 // approximation:
5199 // Y = (38 * R + 75 * G + 15 * B) / 128
5200
5201 // we expect the following input pattern (for here RGB24):
5202 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5203 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5204
5205 // we store eight 16 bit values holding 64 for rounding purpose:
5206 const __m128i constant64_u_16x8 = _mm_set1_epi32(0x00400040);
5207
5208 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5209 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5210 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5211
5212 __m128i channel0_u_8x16;
5213 __m128i channel1_u_8x16;
5214 __m128i channel2_u_8x16;
5215 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5216
5217 // now we need 16 bit values instead of 8 bit values
5218
5219 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5220 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5221 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5222
5223 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5224 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5225 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5226
5227 // we multiply each channel with the corresponding multiplication factors
5228
5229 const __m128i result0_low_u_8x16 = _mm_mullo_epi16(channel0_low_u_8x16, multiplicationFactors0_128_u_16x8);
5230 const __m128i result0_high_u_8x16 = _mm_mullo_epi16(channel0_high_u_8x16, multiplicationFactors0_128_u_16x8);
5231
5232 const __m128i result1_low_u_8x16 = _mm_mullo_epi16(channel1_low_u_8x16, multiplicationFactors1_128_u_16x8);
5233 const __m128i result1_high_u_8x16 = _mm_mullo_epi16(channel1_high_u_8x16, multiplicationFactors1_128_u_16x8);
5234
5235 const __m128i result2_low_u_8x16 = _mm_mullo_epi16(channel2_low_u_8x16, multiplicationFactors2_128_u_16x8);
5236 const __m128i result2_high_u_8x16 = _mm_mullo_epi16(channel2_high_u_8x16, multiplicationFactors2_128_u_16x8);
5237
5238 // we sum up all results and add 64 for rounding purpose
5239 const __m128i result128_low_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_low_u_8x16, result1_low_u_8x16), _mm_adds_epu16(result2_low_u_8x16, constant64_u_16x8));
5240 const __m128i result128_high_u_8x16 = _mm_adds_epu16(_mm_adds_epu16(result0_high_u_8x16, result1_high_u_8x16), _mm_adds_epu16(result2_high_u_8x16, constant64_u_16x8));
5241
5242 // we shift the multiplication results by 7 bits (= 128)
5243 const __m128i result_low_u_8x16 = _mm_srli_epi16(result128_low_u_8x16, 7);
5244 const __m128i result_high_u_8x16 = _mm_srli_epi16(result128_high_u_8x16, 7);
5245
5246 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5247 const __m128i result_u_8x16 = _mm_or_si128(result_low_u_8x16, _mm_slli_epi16(result_high_u_8x16, 8));
5248
5249 // and we can store the result
5250 _mm_storeu_si128((__m128i*)target, result_u_8x16);
5251}
5252
5253OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_128_s_16x8, const __m128i& factorChannel10_128_s_16x8, const __m128i& factorChannel20_128_s_16x8, const __m128i& factorChannel01_128_s_16x8, const __m128i& factorChannel11_128_s_16x8, const __m128i& factorChannel21_128_s_16x8, const __m128i& factorChannel02_128_s_16x8, const __m128i& factorChannel12_128_s_16x8, const __m128i& factorChannel22_128_s_16x8, const __m128i& biasChannel0_s_16x8, const __m128i& biasChannel1_s_16x8, const __m128i& biasChannel2_s_16x8)
5254{
5255 ocean_assert(source != nullptr && target != nullptr);
5256
5257 // the documentation of this function designed for RGB24 to YUV24 conversion
5258
5259 // precise color space conversion:
5260 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5261 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5262 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5263 // | 1 |
5264
5265 // approximation:
5266 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5267 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5268 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5269
5270 // we expect the following input pattern (for here RGB24):
5271 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5272 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5273
5274 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5275 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5276 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5277
5278 __m128i channel0_u_8x16;
5279 __m128i channel1_u_8x16;
5280 __m128i channel2_u_8x16;
5281 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5282
5283 // now we need 16 bit values instead of 8 bit values
5284
5285 const __m128i channel0_low_u_8x16 = SSE::removeHighBits16_8(channel0_u_8x16);
5286 const __m128i channel1_low_u_8x16 = SSE::removeHighBits16_8(channel1_u_8x16);
5287 const __m128i channel2_low_u_8x16 = SSE::removeHighBits16_8(channel2_u_8x16);
5288
5289 const __m128i channel0_high_u_8x16 = _mm_srli_epi16(channel0_u_8x16, 8);
5290 const __m128i channel1_high_u_8x16 = _mm_srli_epi16(channel1_u_8x16, 8);
5291 const __m128i channel2_high_u_8x16 = _mm_srli_epi16(channel2_u_8x16, 8);
5292
5293 // we multiply each channel with the corresponding multiplication factors
5294
5295 __m128i result0_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel02_128_s_16x8));
5296 __m128i result1_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel12_128_s_16x8));
5297 __m128i result2_low_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_low_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_low_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_low_u_8x16, factorChannel22_128_s_16x8));
5298
5299 __m128i result0_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel00_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel01_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel02_128_s_16x8));
5300 __m128i result1_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel10_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel11_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel12_128_s_16x8));
5301 __m128i result2_high_u_8x16 = _mm_add_epi16(_mm_add_epi16(_mm_mullo_epi16(channel0_high_u_8x16, factorChannel20_128_s_16x8), _mm_mullo_epi16(channel1_high_u_8x16, factorChannel21_128_s_16x8)), _mm_mullo_epi16(channel2_high_u_8x16, factorChannel22_128_s_16x8));
5302
5303 // we normalize the result by 128 and add the bias
5304
5305 result0_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_low_u_8x16, 7), biasChannel0_s_16x8);
5306 result1_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_low_u_8x16, 7), biasChannel1_s_16x8);
5307 result2_low_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_low_u_8x16, 7), biasChannel2_s_16x8);
5308
5309 result0_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result0_high_u_8x16, 7), biasChannel0_s_16x8);
5310 result1_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result1_high_u_8x16, 7), biasChannel1_s_16x8);
5311 result2_high_u_8x16 = _mm_add_epi16(SSE::divideByRightShiftSigned16Bit(result2_high_u_8x16, 7), biasChannel2_s_16x8);
5312
5313 // from here, we need values within the range [0, 255], so that we clamp the results
5314
5315 const __m128i constant255_s_16x8 = _mm_set1_epi16(255);
5316
5317 result0_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5318 result1_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5319 result2_low_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_low_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5320
5321 result0_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result0_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5322 result1_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result1_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5323 result2_high_u_8x16 = _mm_min_epi16(_mm_max_epi16(result2_high_u_8x16, _mm_setzero_si128()), constant255_s_16x8);
5324
5325 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5326 const __m128i result0_u_8x16 = _mm_or_si128(result0_low_u_8x16, _mm_slli_epi16(result0_high_u_8x16, 8));
5327 const __m128i result1_u_8x16 = _mm_or_si128(result1_low_u_8x16, _mm_slli_epi16(result1_high_u_8x16, 8));
5328 const __m128i result2_u_8x16 = _mm_or_si128(result2_low_u_8x16, _mm_slli_epi16(result2_high_u_8x16, 8));
5329
5330 __m128i resultA_u_8x16;
5331 __m128i resultB_u_8x16;
5332 __m128i resultC_u_8x16;
5333 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5334
5335 // and we can store the result
5336 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5337 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5338 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5339}
5340
5341OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& factorChannel00_1024_s_16x8, const __m128i& factorChannel10_1024_s_16x8, const __m128i& factorChannel20_1024_s_16x8, const __m128i& factorChannel01_1024_s_16x8, const __m128i& factorChannel11_1024_s_16x8, const __m128i& factorChannel21_1024_s_16x8, const __m128i& factorChannel02_1024_s_16x8, const __m128i& factorChannel12_1024_s_16x8, const __m128i& factorChannel22_1024_s_16x8, const __m128i& biasChannel0_1024_s_32x4, const __m128i& biasChannel1_1024_s_32x4, const __m128i& biasChannel2_1024_s_32x4)
5342{
5343 ocean_assert(source != nullptr && target != nullptr);
5344
5345 // the documentation of this function designed for RGB24 to YUV24 conversion
5346
5347 /// precise color space conversion:
5348 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5349 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5350 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5351 // | 1 |
5352
5353 // approximation:
5354 // | R | | 1192 0 1634 -223 | | Y |
5355 // | G | = | 1192 -400 -833 135 | * | U |
5356 // | B | | 1192 2066 0 -277 | | V |
5357 // | 1 |
5358
5359 // we expect the following input pattern (for here RGB24):
5360 // FEDC BA98 7654 3210 FEDC BA98 7654 3210 FEDC BA98 7654 3210
5361 // BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR BGRB GRBG RBGR
5362
5363 const __m128i sourceA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5364 const __m128i sourceB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5365 const __m128i sourceC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5366
5367 __m128i channel0_u_8x16;
5368 __m128i channel1_u_8x16;
5369 __m128i channel2_u_8x16;
5370 SSE::deInterleave3Channel8Bit48Elements(sourceA_u_8x16, sourceB_u_8x16, sourceC_u_8x16, channel0_u_8x16, channel1_u_8x16, channel2_u_8x16);
5371
5372
5373 // now we need 16 bit values instead of 8 bit values
5374
5375 const __m128i channel0_low_u_16x8 = SSE::removeHighBits16_8(channel0_u_8x16);
5376 const __m128i channel1_low_u_16x8 = SSE::removeHighBits16_8(channel1_u_8x16);
5377 const __m128i channel2_low_u_16x8 = SSE::removeHighBits16_8(channel2_u_8x16);
5378
5379 const __m128i channel0_high_u_16x8 = _mm_srli_epi16(channel0_u_8x16, 8);
5380 const __m128i channel1_high_u_16x8 = _mm_srli_epi16(channel1_u_8x16, 8);
5381 const __m128i channel2_high_u_16x8 = _mm_srli_epi16(channel2_u_8x16, 8);
5382
5383
5384 // we multiply each channel with the corresponding multiplication factors (int16_t * int16_t = int32_t), and we normalize the result by 1024
5385
5386 __m128i result0_low_A_s_32x4;
5387 __m128i result0_low_B_s_32x4;
5388 __m128i result0_high_A_s_32x4;
5389 __m128i result0_high_B_s_32x4;
5390
5391 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel00_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5392 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel00_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5393
5394 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel01_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5395 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel01_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5396
5397 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel02_1024_s_16x8, result0_low_A_s_32x4, result0_low_B_s_32x4);
5398 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel02_1024_s_16x8, result0_high_A_s_32x4, result0_high_B_s_32x4);
5399
5400 result0_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5401 result0_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_low_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5402 result0_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_A_s_32x4, biasChannel0_1024_s_32x4), 10);
5403 result0_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result0_high_B_s_32x4, biasChannel0_1024_s_32x4), 10);
5404
5405
5406 __m128i result1_low_A_s_32x4;
5407 __m128i result1_low_B_s_32x4;
5408 __m128i result1_high_A_s_32x4;
5409 __m128i result1_high_B_s_32x4;
5410
5411 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel10_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5412 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel10_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5413
5414 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel11_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5415 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel11_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5416
5417 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel12_1024_s_16x8, result1_low_A_s_32x4, result1_low_B_s_32x4);
5418 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel12_1024_s_16x8, result1_high_A_s_32x4, result1_high_B_s_32x4);
5419
5420 result1_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5421 result1_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_low_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5422 result1_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_A_s_32x4, biasChannel1_1024_s_32x4), 10);
5423 result1_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result1_high_B_s_32x4, biasChannel1_1024_s_32x4), 10);
5424
5425
5426 __m128i result2_low_A_s_32x4;
5427 __m128i result2_low_B_s_32x4;
5428 __m128i result2_high_A_s_32x4;
5429 __m128i result2_high_B_s_32x4;
5430
5431 SSE::multiplyInt8x16ToInt32x8(channel0_low_u_16x8, factorChannel20_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5432 SSE::multiplyInt8x16ToInt32x8(channel0_high_u_16x8, factorChannel20_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5433
5434 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_low_u_16x8, factorChannel21_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5435 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel1_high_u_16x8, factorChannel21_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5436
5437 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_low_u_16x8, factorChannel22_1024_s_16x8, result2_low_A_s_32x4, result2_low_B_s_32x4);
5438 SSE::multiplyInt8x16ToInt32x8AndAccumulate(channel2_high_u_16x8, factorChannel22_1024_s_16x8, result2_high_A_s_32x4, result2_high_B_s_32x4);
5439
5440 result2_low_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5441 result2_low_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_low_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5442 result2_high_A_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_A_s_32x4, biasChannel2_1024_s_32x4), 10);
5443 result2_high_B_s_32x4 = SSE::divideByRightShiftSigned32Bit(_mm_add_epi32(result2_high_B_s_32x4, biasChannel2_1024_s_32x4), 10);
5444
5445
5446 // now we have int32_t values with 0x0000 or 0xFFFF in the high 16 bits
5447 // thus we can merge 8 int32_t values to 8 int16_t values
5448
5449 const __m128i mask_0000FFFF_32x4 = _mm_set1_epi32(0x0000FFFF);
5450
5451 __m128i result0_A_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_A_s_32x4, 16));
5452 __m128i result0_B_s_16x8 = _mm_or_si128(_mm_and_si128(result0_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result0_high_B_s_32x4, 16));
5453
5454 __m128i result1_A_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_A_s_32x4, 16));
5455 __m128i result1_B_s_16x8 = _mm_or_si128(_mm_and_si128(result1_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result1_high_B_s_32x4, 16));
5456
5457 __m128i result2_A_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_A_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_A_s_32x4, 16));
5458 __m128i result2_B_s_16x8 = _mm_or_si128(_mm_and_si128(result2_low_B_s_32x4, mask_0000FFFF_32x4), _mm_slli_epi32(result2_high_B_s_32x4, 16));
5459
5460
5461 // we combine 16 int16_t values to 16 uint8_t values (saturated)
5462
5463 const __m128i result0_u_8x16 = _mm_packus_epi16(result0_A_s_16x8, result0_B_s_16x8);
5464 const __m128i result1_u_8x16 = _mm_packus_epi16(result1_A_s_16x8, result1_B_s_16x8);
5465 const __m128i result2_u_8x16 = _mm_packus_epi16(result2_A_s_16x8, result2_B_s_16x8);
5466
5467 __m128i resultA_u_8x16;
5468 __m128i resultB_u_8x16;
5469 __m128i resultC_u_8x16;
5470 SSE::interleave3Channel8Bit48Elements(result0_u_8x16, result1_u_8x16, result2_u_8x16, resultA_u_8x16, resultB_u_8x16, resultC_u_8x16);
5471
5472 // and we can store the result
5473 _mm_storeu_si128((__m128i*)target + 0, resultA_u_8x16);
5474 _mm_storeu_si128((__m128i*)target + 1, resultB_u_8x16);
5475 _mm_storeu_si128((__m128i*)target + 2, resultC_u_8x16);
5476}
5477
5478OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactors0123_128_s_32x4)
5479{
5480 ocean_assert(source != nullptr && target != nullptr);
5481
5482 // the documentation of this function is designed for RGBA32 to Y8 conversion
5483 // however, in general this function can be used to apply a linear combination on the four source channels
5484 // to create one output channel
5485
5486 // precise color space conversion:
5487 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5488
5489 // approximation:
5490 // Y = (38 * R + 75 * G + 15 * B) / 128
5491
5492 // we expect the following input pattern (for here RGBA32):
5493 // FEDC BA98 7654 3210
5494 // ABGR ABGR ABGR ABGR
5495
5496 // we calculate:
5497 // (int16_t)((uint8_t)R * (signed char)38) + (int16_t)((uint8_t)G * (signed char)75) for the first 16 bits
5498 // (int16_t)((uint8_t)B * (signed char)15) + (int16_t)((uint8_t)A * (signed char)0) for the second 16 bits
5499
5500 // we store eight 16 bit values holding 64 for rounding purpose:
5501 // FE DC BA 98 76 54 32 10
5502 // 64 64 64 64 64 64 64 64
5503 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5504
5505 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5506 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5507 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5508 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5509
5510 // we get the following pattern
5511 // FE DC BA 98 76 54 32 10
5512 // 0b gr 0b gr 0b gr 0b gr
5513 const __m128i intermediateResults0_u_16x8 = _mm_maddubs_epi16(pixelsA_u_8x16, multiplicationFactors0123_128_s_32x4);
5514 const __m128i intermediateResults1_u_16x8 = _mm_maddubs_epi16(pixelsB_u_8x16, multiplicationFactors0123_128_s_32x4);
5515 const __m128i intermediateResults2_u_16x8 = _mm_maddubs_epi16(pixelsC_u_8x16, multiplicationFactors0123_128_s_32x4);
5516 const __m128i intermediateResults3_u_16x8 = _mm_maddubs_epi16(pixelsD_u_8x16, multiplicationFactors0123_128_s_32x4);
5517
5518 // now we sum the pairs of neighboring 16 bit intermediate results
5519 __m128i grayA_u_16x8 = _mm_hadd_epi16(intermediateResults0_u_16x8, intermediateResults1_u_16x8);
5520 __m128i grayB_u_16x8 = _mm_hadd_epi16(intermediateResults2_u_16x8, intermediateResults3_u_16x8);
5521
5522 // we add 64 for rounding purpose
5523 grayA_u_16x8 = _mm_add_epi16(grayA_u_16x8, constant64_u_8x16);
5524 grayB_u_16x8 = _mm_add_epi16(grayB_u_16x8, constant64_u_8x16);
5525
5526 // we shift the multiplication results by 7 bits (= 128)
5527 grayA_u_16x8 = _mm_srli_epi16(grayA_u_16x8, 7);
5528 grayB_u_16x8 = _mm_srli_epi16(grayB_u_16x8, 7);
5529
5530 // now we have the following pattern (in two 128 bit registers):
5531 // FEDCBA9876543210
5532 // 0Y0Y0Y0Y0Y0Y0Y0Y
5533
5534 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5535 const __m128i gray_u_8x16 = _mm_packus_epi16(grayA_u_16x8, grayB_u_16x8);
5536
5537 // and we can store the result
5538 _mm_storeu_si128((__m128i*)target, gray_u_8x16);
5539}
5540
5541void FrameChannels::convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t* const source, uint8_t* const target, const __m128i& multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i& multiplicationFactorsChannel1_0123_128_s_16x8)
5542{
5543 ocean_assert(source != nullptr && target != nullptr);
5544
5545 // the documentation of this function is designed for RGBA32 to YA16 conversion
5546 // however, in general this function can be used to apply a linear combination on the four source channels
5547 // to create one output channel
5548
5549 // precise color space conversion:
5550 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
5551 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
5552
5553 // approximation:
5554 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
5555 // A = (128 * A) / 128
5556
5557 // we expect the following input pattern (for here RGBA32):
5558 // FEDC BA98 7654 3210
5559 // ABGR ABGR ABGR ABGR
5560
5561 // we store eight 16 bit values holding 64 for rounding purpose:
5562 // FE DC BA 98 76 54 32 10
5563 // 64 64 64 64 64 64 64 64
5564 const __m128i constant64_u_8x16 = _mm_set1_epi32(0x00400040);
5565
5566 const __m128i pixelsA_u_8x16 = _mm_loadu_si128((const __m128i*)source + 0);
5567 const __m128i pixelsB_u_8x16 = _mm_loadu_si128((const __m128i*)source + 1);
5568 const __m128i pixelsC_u_8x16 = _mm_loadu_si128((const __m128i*)source + 2);
5569 const __m128i pixelsD_u_8x16 = _mm_loadu_si128((const __m128i*)source + 3);
5570
5571 // we convert the 8 bit values to 16 bit values
5572
5573 const __m128i pixelsA_u_16x8 = _mm_unpacklo_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5574 const __m128i pixelsB_u_16x8 = _mm_unpackhi_epi8(pixelsA_u_8x16, _mm_setzero_si128());
5575
5576 const __m128i pixelsC_u_16x8 = _mm_unpacklo_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5577 const __m128i pixelsD_u_16x8 = _mm_unpackhi_epi8(pixelsB_u_8x16, _mm_setzero_si128());
5578
5579 const __m128i pixelsE_u_16x8 = _mm_unpacklo_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5580 const __m128i pixelsF_u_16x8 = _mm_unpackhi_epi8(pixelsC_u_8x16, _mm_setzero_si128());
5581
5582 const __m128i pixelsG_u_16x8 = _mm_unpacklo_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5583 const __m128i pixelsH_u_16x8 = _mm_unpackhi_epi8(pixelsD_u_8x16, _mm_setzero_si128());
5584
5585 // now we have the following pattern
5586 // FE DC BA 98 76 54 32 10
5587 // 0a 0b 0g 0r 0a 0b 0g 0r
5588
5589 const __m128i intermediateResultsChannel0_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8); // r * f00 + g * f01 | b * f02 + a * f03 | ...
5590 const __m128i intermediateResultsChannel0_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5591 const __m128i intermediateResultsChannel0_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5592 const __m128i intermediateResultsChannel0_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5593 const __m128i intermediateResultsChannel0_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5594 const __m128i intermediateResultsChannel0_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5595 const __m128i intermediateResultsChannel0_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5596 const __m128i intermediateResultsChannel0_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel0_0123_128_s_16x8);
5597
5598 const __m128i resultsChannel0_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_0_u_32x4, intermediateResultsChannel0_1_u_32x4); // r * f00 + g * f01 + b * f02 + a * f03 | ...
5599 const __m128i resultsChannel0_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_2_u_32x4, intermediateResultsChannel0_3_u_32x4);
5600 const __m128i resultsChannel0_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_4_u_32x4, intermediateResultsChannel0_5_u_32x4);
5601 const __m128i resultsChannel0_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel0_6_u_32x4, intermediateResultsChannel0_7_u_32x4);
5602
5603
5604 const __m128i intermediateResultsChannel1_0_u_32x4 = _mm_madd_epi16(pixelsA_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8); // r * f10 + g * f11 | b * f12 + a * f13 | ...
5605 const __m128i intermediateResultsChannel1_1_u_32x4 = _mm_madd_epi16(pixelsB_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5606 const __m128i intermediateResultsChannel1_2_u_32x4 = _mm_madd_epi16(pixelsC_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5607 const __m128i intermediateResultsChannel1_3_u_32x4 = _mm_madd_epi16(pixelsD_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5608 const __m128i intermediateResultsChannel1_4_u_32x4 = _mm_madd_epi16(pixelsE_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5609 const __m128i intermediateResultsChannel1_5_u_32x4 = _mm_madd_epi16(pixelsF_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5610 const __m128i intermediateResultsChannel1_6_u_32x4 = _mm_madd_epi16(pixelsG_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5611 const __m128i intermediateResultsChannel1_7_u_32x4 = _mm_madd_epi16(pixelsH_u_16x8, multiplicationFactorsChannel1_0123_128_s_16x8);
5612
5613 const __m128i resultsChannel1_A_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_0_u_32x4, intermediateResultsChannel1_1_u_32x4); // r * f10 + g * f11 + b * f12 + a * f13 | ...
5614 const __m128i resultsChannel1_B_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_2_u_32x4, intermediateResultsChannel1_3_u_32x4);
5615 const __m128i resultsChannel1_C_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_4_u_32x4, intermediateResultsChannel1_5_u_32x4);
5616 const __m128i resultsChannel1_D_u_32x4 = _mm_hadd_epi32(intermediateResultsChannel1_6_u_32x4, intermediateResultsChannel1_7_u_32x4);
5617
5618 // now we interleave the results of first and second channel (as both results fit into 16 bit)
5619
5620 __m128i resultA_u_16x8 = _mm_or_si128(resultsChannel0_A_u_32x4, _mm_slli_epi32(resultsChannel1_A_u_32x4, 16));
5621 __m128i resultB_u_16x8 = _mm_or_si128(resultsChannel0_B_u_32x4, _mm_slli_epi32(resultsChannel1_B_u_32x4, 16));
5622 __m128i resultC_u_16x8 = _mm_or_si128(resultsChannel0_C_u_32x4, _mm_slli_epi32(resultsChannel1_C_u_32x4, 16));
5623 __m128i resultD_u_16x8 = _mm_or_si128(resultsChannel0_D_u_32x4, _mm_slli_epi32(resultsChannel1_D_u_32x4, 16));
5624
5625 // we add 64 for rounding purpose
5626 resultA_u_16x8 = _mm_add_epi16(resultA_u_16x8, constant64_u_8x16);
5627 resultB_u_16x8 = _mm_add_epi16(resultB_u_16x8, constant64_u_8x16);
5628 resultC_u_16x8 = _mm_add_epi16(resultC_u_16x8, constant64_u_8x16);
5629 resultD_u_16x8 = _mm_add_epi16(resultD_u_16x8, constant64_u_8x16);
5630
5631 // we shift the multiplication results by 7 bits (= 128)
5632 resultA_u_16x8 = _mm_srli_epi16(resultA_u_16x8, 7);
5633 resultB_u_16x8 = _mm_srli_epi16(resultB_u_16x8, 7);
5634 resultC_u_16x8 = _mm_srli_epi16(resultC_u_16x8, 7);
5635 resultD_u_16x8 = _mm_srli_epi16(resultD_u_16x8, 7);
5636
5637 // now we have the following pattern (in two 128 bit registers):
5638 // FEDCBA9876543210
5639 // 0A0Y0A0Y0A0Y0A0Y
5640
5641 // finally, we have to get rid of the upper zero bits by combining two 128 bit registers to one:
5642 const __m128i resultAB_u_8x16 = _mm_packus_epi16(resultA_u_16x8, resultB_u_16x8);
5643 const __m128i resultCD_u_8x16 = _mm_packus_epi16(resultC_u_16x8, resultD_u_16x8);
5644
5645 // and we can store the result
5646 _mm_storeu_si128((__m128i*)target + 0, resultAB_u_8x16);
5647 _mm_storeu_si128((__m128i*)target + 1, resultCD_u_8x16);
5648}
5649
5650#endif // OCEAN_HARDWARE_SSE_VERSION
5651
5652#if defined(OCEAN_HARDWARE_NEON_VERSION) && OCEAN_HARDWARE_NEON_VERSION >= 10
5653
5654template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2>
5655void FrameChannels::convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8)
5656{
5657 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2, "Invalid multiplication factors!");
5658
5659 ocean_assert(source != nullptr && target != nullptr);
5660
5661 // the documentation of this function designed for RGB24 to Y8 conversion
5662
5663 // precise color space conversion:
5664 // Y = 0.299 * R + 0.587 * G + 0.114 * B
5665
5666 // approximation:
5667 // Y = (38 * R + 75 * G + 15 * B) / 128
5668
5669 // we expect the following input pattern (for here RGB24):
5670 // FEDC BA98 7654 3210
5671 // RBGR BGRB GRBG RBGR
5672
5673 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5674 // source_u_8x8x3.val[0]: R R R R R R R R
5675 // source_u_8x8x3.val[1]: G G G G G G G G
5676 // source_u_8x8x3.val[2]: B B B B B B B B
5677
5678 uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5679
5680 uint16x8_t intermediateResults_u_16x8;
5681
5682 // we multiply the first channel with the specified factor (unless zero)
5683
5684 if constexpr (tUseFactorChannel0)
5685 {
5686 intermediateResults_u_16x8 = vmull_u8(source_u_8x8x3.val[0], factorChannel0_128_u_8x8);
5687 }
5688 else
5689 {
5690 intermediateResults_u_16x8 = vdupq_n_u16(0u);
5691 }
5692
5693 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
5694
5695 if constexpr (tUseFactorChannel1)
5696 {
5697 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[1], factorChannel1_128_u_8x8);
5698 }
5699
5700 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
5701
5702 if constexpr (tUseFactorChannel2)
5703 {
5704 intermediateResults_u_16x8 = vmlal_u8(intermediateResults_u_16x8, source_u_8x8x3.val[2], factorChannel2_128_u_8x8);
5705 }
5706
5707 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
5708 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_u_16x8, 7); // results_u_8x8 = (intermediateResults_u_16x8 + 2^6) >> 2^7
5709
5710 // and we can store the result
5711 vst1_u8(target, results_u_8x8);
5712}
5713
5714OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5715{
5716 ocean_assert(source != nullptr && target != nullptr);
5717
5718 // the documentation of this function designed for YUV24 to RGB24 conversion
5719
5720 // precise color space conversion:
5721 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5722 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5723 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5724 // | 1 |
5725
5726 // approximation:
5727 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5728 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5729 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5730
5731 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5732 // source_u_8x8x3.val[0]: R R R R R R R R
5733 // source_u_8x8x3.val[1]: G G G G G G G G
5734 // source_u_8x8x3.val[2]: B B B B B B B B
5735
5736 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5737
5738 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5739 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[0], biasChannel0_u_8x8));
5740 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[1], biasChannel1_u_8x8));
5741 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(source_u_8x8x3.val[2], biasChannel2_u_8x8));
5742
5743 // now we apply the 3x3 matrix multiplication
5744
5745 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_64_s_16x8);
5746 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_64_s_16x8);
5747 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_64_s_16x8);
5748
5749 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source1_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5750 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source1_s_16x8, factorChannel11_64_s_16x8));
5751 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source1_s_16x8, factorChannel21_64_s_16x8));
5752
5753 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, vmulq_s16(source2_s_16x8, factorChannel02_64_s_16x8));
5754 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, vmulq_s16(source2_s_16x8, factorChannel12_64_s_16x8));
5755 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, vmulq_s16(source2_s_16x8, factorChannel22_64_s_16x8));
5756
5757 uint8x8x3_t results_u_8x8x3;
5758
5759 // saturated narrow signed to unsigned, normalized by 2^6
5760 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 6);
5761 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 6);
5762 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 6);
5763
5764 // and we can store the result
5765 vst3_u8(target, results_u_8x8x3);
5766}
5767
5768OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8)
5769{
5770 ocean_assert(source != nullptr && target != nullptr);
5771
5772 // the documentation of this function designed for YUV24 to RGB24 conversion
5773
5774 // precise color space conversion:
5775 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
5776 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
5777 // | B | | 1 1.732446 0.0 -221.753088 | | V |
5778 // | 1 |
5779
5780 // approximation:
5781 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
5782 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
5783 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
5784
5785 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
5786
5787 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
5788 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5789 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5790 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5791
5792 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
5793 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
5794 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
5795
5796 // now we mulitply apply the 3x3 matrix multiplication
5797
5798 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
5799 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
5800 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
5801
5802 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
5803 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
5804 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
5805
5806 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
5807 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
5808 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
5809
5810 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
5811 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
5812 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
5813
5814 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
5815 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
5816 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
5817
5818 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
5819 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
5820 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
5821
5822 uint8x16x3_t results_u_8x16x3;
5823
5824 // saturated narrow signed to unsigned, normalized by 2^6
5825 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
5826 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
5827 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
5828
5829 // and we can store the result
5830 vst3q_u8(target, results_u_8x16x3);
5831}
5832
5833OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
5834{
5835 ocean_assert(source != nullptr && target != nullptr);
5836
5837 // the documentation of this function designed for RGB24 to YUV24 conversion
5838
5839 // precise color space conversion:
5840 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
5841 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
5842 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
5843 // | 1 |
5844
5845 // approximation:
5846 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
5847 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
5848 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
5849
5850 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5851 // source_u_8x8x3.val[0]: R R R R R R R R
5852 // source_u_8x8x3.val[1]: G G G G G G G G
5853 // source_u_8x8x3.val[2]: B B B B B B B B
5854
5855 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5856
5857 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5858 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5859 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5860
5861 int16x8_t intermediateResults0_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel00_128_s_16x8);
5862 int16x8_t intermediateResults1_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel10_128_s_16x8);
5863 int16x8_t intermediateResults2_s_16x8 = vmulq_s16(source0_s_16x8, factorChannel20_128_s_16x8);
5864
5865 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source1_s_16x8, factorChannel01_128_s_16x8);
5866 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source1_s_16x8, factorChannel11_128_s_16x8);
5867 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source1_s_16x8, factorChannel21_128_s_16x8);
5868
5869 intermediateResults0_s_16x8 = vmlaq_s16(intermediateResults0_s_16x8, source2_s_16x8, factorChannel02_128_s_16x8);
5870 intermediateResults1_s_16x8 = vmlaq_s16(intermediateResults1_s_16x8, source2_s_16x8, factorChannel12_128_s_16x8);
5871 intermediateResults2_s_16x8 = vmlaq_s16(intermediateResults2_s_16x8, source2_s_16x8, factorChannel22_128_s_16x8);
5872
5873 // now we add the bias values (saturated)
5874
5875 intermediateResults0_s_16x8 = vqaddq_s16(intermediateResults0_s_16x8, biasChannel0_128_s_16x8);
5876 intermediateResults1_s_16x8 = vqaddq_s16(intermediateResults1_s_16x8, biasChannel1_128_s_16x8);
5877 intermediateResults2_s_16x8 = vqaddq_s16(intermediateResults2_s_16x8, biasChannel2_128_s_16x8);
5878
5879 uint8x8x3_t results_u_8x8x3;
5880
5881 // saturated narrow signed to unsigned
5882 results_u_8x8x3.val[0] = vqrshrun_n_s16(intermediateResults0_s_16x8, 7);
5883 results_u_8x8x3.val[1] = vqrshrun_n_s16(intermediateResults1_s_16x8, 7);
5884 results_u_8x8x3.val[2] = vqrshrun_n_s16(intermediateResults2_s_16x8, 7);
5885
5886 // and we can store the result
5887 vst3_u8(target, results_u_8x8x3);
5888}
5889
5890OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5891{
5892 ocean_assert(source != nullptr && target != nullptr);
5893
5894 // the documentation of this function designed for YUV24 to RGB24 conversion
5895
5896 // precise color space conversion:
5897 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5898 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5899 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5900 // | 1 |
5901
5902 // approximation:
5903 // | R | | 1192 0 1634 -223 | | Y |
5904 // | G | = | 1192 -400 -833 135 | * | U |
5905 // | B | | 1192 2066 0 -277 | | V |
5906 // | 1 |
5907
5908 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
5909 // source_u_8x8x3.val[0]: R R R R R R R R
5910 // source_u_8x8x3.val[1]: G G G G G G G G
5911 // source_u_8x8x3.val[2]: B B B B B B B B
5912
5913 const uint8x8x3_t source_u_8x8x3 = vld3_u8(source);
5914
5915 const int16x8_t source0_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[0]));
5916 const int16x8_t source1_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[1]));
5917 const int16x8_t source2_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(source_u_8x8x3.val[2]));
5918
5919 const int16x4_t source0_low_s_16x4 = vget_low_s16(source0_s_16x8);
5920 const int16x4_t source0_high_s_16x4 = vget_high_s16(source0_s_16x8);
5921
5922 int32x4_t intermediateResults0_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel00_1024_s_16x4);
5923 int32x4_t intermediateResults0_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel00_1024_s_16x4);
5924
5925 int32x4_t intermediateResults1_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel10_1024_s_16x4);
5926 int32x4_t intermediateResults1_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel10_1024_s_16x4);
5927
5928 int32x4_t intermediateResults2_low_s_32x4 = vmull_s16(source0_low_s_16x4, factorChannel20_1024_s_16x4);
5929 int32x4_t intermediateResults2_high_s_32x4 = vmull_s16(source0_high_s_16x4, factorChannel20_1024_s_16x4);
5930
5931
5932 const int16x4_t source1_low_s_16x4 = vget_low_s16(source1_s_16x8);
5933 const int16x4_t source1_high_s_16x4 = vget_high_s16(source1_s_16x8);
5934
5935 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source1_low_s_16x4, factorChannel01_1024_s_16x4);
5936 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source1_high_s_16x4, factorChannel01_1024_s_16x4);
5937
5938 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source1_low_s_16x4, factorChannel11_1024_s_16x4);
5939 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source1_high_s_16x4, factorChannel11_1024_s_16x4);
5940
5941 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source1_low_s_16x4, factorChannel21_1024_s_16x4);
5942 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source1_high_s_16x4, factorChannel21_1024_s_16x4);
5943
5944
5945 const int16x4_t source2_low_s_16x4 = vget_low_s16(source2_s_16x8);
5946 const int16x4_t source2_high_s_16x4 = vget_high_s16(source2_s_16x8);
5947
5948 intermediateResults0_low_s_32x4 = vmlal_s16(intermediateResults0_low_s_32x4, source2_low_s_16x4, factorChannel02_1024_s_16x4);
5949 intermediateResults0_high_s_32x4 = vmlal_s16(intermediateResults0_high_s_32x4, source2_high_s_16x4, factorChannel02_1024_s_16x4);
5950
5951 intermediateResults1_low_s_32x4 = vmlal_s16(intermediateResults1_low_s_32x4, source2_low_s_16x4, factorChannel12_1024_s_16x4);
5952 intermediateResults1_high_s_32x4 = vmlal_s16(intermediateResults1_high_s_32x4, source2_high_s_16x4, factorChannel12_1024_s_16x4);
5953
5954 intermediateResults2_low_s_32x4 = vmlal_s16(intermediateResults2_low_s_32x4, source2_low_s_16x4, factorChannel22_1024_s_16x4);
5955 intermediateResults2_high_s_32x4 = vmlal_s16(intermediateResults2_high_s_32x4, source2_high_s_16x4, factorChannel22_1024_s_16x4);
5956
5957
5958 // now we add the bias values (saturated)
5959
5960 intermediateResults0_low_s_32x4 = vaddq_s32(intermediateResults0_low_s_32x4, biasChannel0_1024_s_32x4);
5961 intermediateResults0_high_s_32x4 = vaddq_s32(intermediateResults0_high_s_32x4, biasChannel0_1024_s_32x4);
5962
5963 intermediateResults1_low_s_32x4 = vaddq_s32(intermediateResults1_low_s_32x4, biasChannel1_1024_s_32x4);
5964 intermediateResults1_high_s_32x4 = vaddq_s32(intermediateResults1_high_s_32x4, biasChannel1_1024_s_32x4);
5965
5966 intermediateResults2_low_s_32x4 = vaddq_s32(intermediateResults2_low_s_32x4, biasChannel2_1024_s_32x4);
5967 intermediateResults2_high_s_32x4 = vaddq_s32(intermediateResults2_high_s_32x4, biasChannel2_1024_s_32x4);
5968
5969
5970 uint8x8x3_t results_u_8x8x3;
5971
5972 // saturated narrow signed to unsigned
5973 results_u_8x8x3.val[0] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_high_s_32x4, 10)));
5974 results_u_8x8x3.val[1] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_high_s_32x4, 10)));
5975 results_u_8x8x3.val[2] = vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_low_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_high_s_32x4, 10)));
5976
5977 // and we can store the result
5978 vst3_u8(target, results_u_8x8x3);
5979}
5980
5981OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x4_t& factorChannel00_1024_s_16x4, const int16x4_t& factorChannel10_1024_s_16x4, const int16x4_t& factorChannel20_1024_s_16x4, const int16x4_t& factorChannel01_1024_s_16x4, const int16x4_t& factorChannel11_1024_s_16x4, const int16x4_t& factorChannel21_1024_s_16x4, const int16x4_t& factorChannel02_1024_s_16x4, const int16x4_t& factorChannel12_1024_s_16x4, const int16x4_t& factorChannel22_1024_s_16x4, const int32x4_t& biasChannel0_1024_s_32x4, const int32x4_t& biasChannel1_1024_s_32x4, const int32x4_t& biasChannel2_1024_s_32x4)
5982{
5983 ocean_assert(source != nullptr && target != nullptr);
5984
5985 // the documentation of this function designed for YUV24 to RGB24 conversion
5986
5987 // precise color space conversion:
5988 // | R | | 1.1639404296875 0.0 1.595947265625 -222.904296875 | | Y |
5989 // | G | = | 1.1639404296875 -0.3909912109375 -0.81298828125 135.486328125 | * | U |
5990 // | B | | 1.1639404296875 2.0179443359375 0.0 -276.919921875 | | V |
5991 // | 1 |
5992
5993 // approximation:
5994 // | R | | 1192 0 1634 -223 | | Y |
5995 // | G | = | 1192 -400 -833 135 | * | U |
5996 // | B | | 1192 2066 0 -277 | | V |
5997 // | 1 |
5998
5999 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6000 // source_u_8x8x3.val[0]: R R R R R R R R
6001 // source_u_8x8x3.val[1]: G G G G G G G G
6002 // source_u_8x8x3.val[2]: B B B B B B B B
6003
6004 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6005
6006 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6007 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6008 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6009
6010 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6011 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6012 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6013
6014 const int16x4_t source0_A_s_16x4 = vget_low_s16(source0_low_s_16x8);
6015 const int16x4_t source0_B_s_16x4 = vget_high_s16(source0_low_s_16x8);
6016 const int16x4_t source0_C_s_16x4 = vget_low_s16(source0_high_s_16x8);
6017 const int16x4_t source0_D_s_16x4 = vget_high_s16(source0_high_s_16x8);
6018
6019 int32x4_t intermediateResults0_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel00_1024_s_16x4);
6020 int32x4_t intermediateResults0_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel00_1024_s_16x4);
6021 int32x4_t intermediateResults0_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel00_1024_s_16x4);
6022 int32x4_t intermediateResults0_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel00_1024_s_16x4);
6023
6024 int32x4_t intermediateResults1_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel10_1024_s_16x4);
6025 int32x4_t intermediateResults1_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel10_1024_s_16x4);
6026 int32x4_t intermediateResults1_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel10_1024_s_16x4);
6027 int32x4_t intermediateResults1_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel10_1024_s_16x4);
6028
6029 int32x4_t intermediateResults2_A_s_32x4 = vmull_s16(source0_A_s_16x4, factorChannel20_1024_s_16x4);
6030 int32x4_t intermediateResults2_B_s_32x4 = vmull_s16(source0_B_s_16x4, factorChannel20_1024_s_16x4);
6031 int32x4_t intermediateResults2_C_s_32x4 = vmull_s16(source0_C_s_16x4, factorChannel20_1024_s_16x4);
6032 int32x4_t intermediateResults2_D_s_32x4 = vmull_s16(source0_D_s_16x4, factorChannel20_1024_s_16x4);
6033
6034
6035 const int16x4_t source1_A_s_16x4 = vget_low_s16(source1_low_s_16x8);
6036 const int16x4_t source1_B_s_16x4 = vget_high_s16(source1_low_s_16x8);
6037 const int16x4_t source1_C_s_16x4 = vget_low_s16(source1_high_s_16x8);
6038 const int16x4_t source1_D_s_16x4 = vget_high_s16(source1_high_s_16x8);
6039
6040 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source1_A_s_16x4, factorChannel01_1024_s_16x4);
6041 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source1_B_s_16x4, factorChannel01_1024_s_16x4);
6042 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source1_C_s_16x4, factorChannel01_1024_s_16x4);
6043 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source1_D_s_16x4, factorChannel01_1024_s_16x4);
6044
6045 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source1_A_s_16x4, factorChannel11_1024_s_16x4);
6046 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source1_B_s_16x4, factorChannel11_1024_s_16x4);
6047 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source1_C_s_16x4, factorChannel11_1024_s_16x4);
6048 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source1_D_s_16x4, factorChannel11_1024_s_16x4);
6049
6050 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source1_A_s_16x4, factorChannel21_1024_s_16x4);
6051 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source1_B_s_16x4, factorChannel21_1024_s_16x4);
6052 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source1_C_s_16x4, factorChannel21_1024_s_16x4);
6053 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source1_D_s_16x4, factorChannel21_1024_s_16x4);
6054
6055
6056 const int16x4_t source2_A_s_16x4 = vget_low_s16(source2_low_s_16x8);
6057 const int16x4_t source2_B_s_16x4 = vget_high_s16(source2_low_s_16x8);
6058 const int16x4_t source2_C_s_16x4 = vget_low_s16(source2_high_s_16x8);
6059 const int16x4_t source2_D_s_16x4 = vget_high_s16(source2_high_s_16x8);
6060
6061 intermediateResults0_A_s_32x4 = vmlal_s16(intermediateResults0_A_s_32x4, source2_A_s_16x4, factorChannel02_1024_s_16x4);
6062 intermediateResults0_B_s_32x4 = vmlal_s16(intermediateResults0_B_s_32x4, source2_B_s_16x4, factorChannel02_1024_s_16x4);
6063 intermediateResults0_C_s_32x4 = vmlal_s16(intermediateResults0_C_s_32x4, source2_C_s_16x4, factorChannel02_1024_s_16x4);
6064 intermediateResults0_D_s_32x4 = vmlal_s16(intermediateResults0_D_s_32x4, source2_D_s_16x4, factorChannel02_1024_s_16x4);
6065
6066 intermediateResults1_A_s_32x4 = vmlal_s16(intermediateResults1_A_s_32x4, source2_A_s_16x4, factorChannel12_1024_s_16x4);
6067 intermediateResults1_B_s_32x4 = vmlal_s16(intermediateResults1_B_s_32x4, source2_B_s_16x4, factorChannel12_1024_s_16x4);
6068 intermediateResults1_C_s_32x4 = vmlal_s16(intermediateResults1_C_s_32x4, source2_C_s_16x4, factorChannel12_1024_s_16x4);
6069 intermediateResults1_D_s_32x4 = vmlal_s16(intermediateResults1_D_s_32x4, source2_D_s_16x4, factorChannel12_1024_s_16x4);
6070
6071 intermediateResults2_A_s_32x4 = vmlal_s16(intermediateResults2_A_s_32x4, source2_A_s_16x4, factorChannel22_1024_s_16x4);
6072 intermediateResults2_B_s_32x4 = vmlal_s16(intermediateResults2_B_s_32x4, source2_B_s_16x4, factorChannel22_1024_s_16x4);
6073 intermediateResults2_C_s_32x4 = vmlal_s16(intermediateResults2_C_s_32x4, source2_C_s_16x4, factorChannel22_1024_s_16x4);
6074 intermediateResults2_D_s_32x4 = vmlal_s16(intermediateResults2_D_s_32x4, source2_D_s_16x4, factorChannel22_1024_s_16x4);
6075
6076
6077 // now we add the bias values (saturated)
6078
6079 intermediateResults0_A_s_32x4 = vaddq_s32(intermediateResults0_A_s_32x4, biasChannel0_1024_s_32x4);
6080 intermediateResults0_B_s_32x4 = vaddq_s32(intermediateResults0_B_s_32x4, biasChannel0_1024_s_32x4);
6081 intermediateResults0_C_s_32x4 = vaddq_s32(intermediateResults0_C_s_32x4, biasChannel0_1024_s_32x4);
6082 intermediateResults0_D_s_32x4 = vaddq_s32(intermediateResults0_D_s_32x4, biasChannel0_1024_s_32x4);
6083
6084 intermediateResults1_A_s_32x4 = vaddq_s32(intermediateResults1_A_s_32x4, biasChannel1_1024_s_32x4);
6085 intermediateResults1_B_s_32x4 = vaddq_s32(intermediateResults1_B_s_32x4, biasChannel1_1024_s_32x4);
6086 intermediateResults1_C_s_32x4 = vaddq_s32(intermediateResults1_C_s_32x4, biasChannel1_1024_s_32x4);
6087 intermediateResults1_D_s_32x4 = vaddq_s32(intermediateResults1_D_s_32x4, biasChannel1_1024_s_32x4);
6088
6089 intermediateResults2_A_s_32x4 = vaddq_s32(intermediateResults2_A_s_32x4, biasChannel2_1024_s_32x4);
6090 intermediateResults2_B_s_32x4 = vaddq_s32(intermediateResults2_B_s_32x4, biasChannel2_1024_s_32x4);
6091 intermediateResults2_C_s_32x4 = vaddq_s32(intermediateResults2_C_s_32x4, biasChannel2_1024_s_32x4);
6092 intermediateResults2_D_s_32x4 = vaddq_s32(intermediateResults2_D_s_32x4, biasChannel2_1024_s_32x4);
6093
6094
6095 uint8x16x3_t results_u_8x16x3;
6096
6097 // saturated narrow signed to unsigned
6098 results_u_8x16x3.val[0] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults0_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults0_D_s_32x4, 10))));
6099
6100 results_u_8x16x3.val[1] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults1_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults1_D_s_32x4, 10))));
6101 results_u_8x16x3.val[2] = vcombine_u8(vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_A_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_B_s_32x4, 10))), vqmovn_u16(vcombine_u16(vqrshrun_n_s32(intermediateResults2_C_s_32x4, 10), vqrshrun_n_s32(intermediateResults2_D_s_32x4, 10))));
6102
6103 // and we can store the result
6104 vst3q_u8(target, results_u_8x16x3);
6105}
6106
6107OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_128_s_16x8, const int16x8_t& factorChannel10_128_s_16x8, const int16x8_t& factorChannel20_128_s_16x8, const int16x8_t& factorChannel01_128_s_16x8, const int16x8_t& factorChannel11_128_s_16x8, const int16x8_t& factorChannel21_128_s_16x8, const int16x8_t& factorChannel02_128_s_16x8, const int16x8_t& factorChannel12_128_s_16x8, const int16x8_t& factorChannel22_128_s_16x8, const int16x8_t& biasChannel0_128_s_16x8, const int16x8_t& biasChannel1_128_s_16x8, const int16x8_t& biasChannel2_128_s_16x8)
6108{
6109 ocean_assert(source != nullptr && target != nullptr);
6110
6111 // the documentation of this function designed for RGB24 to YUV24 conversion
6112
6113 // precise color space conversion:
6114 // | Y | | 0.2578125 0.5039063 0.09765625 16.0 | | R |
6115 // | U | = | -0.1484375 -0.2890625 0.4375 128.0 | * | G |
6116 // | V | | 0.4375 -0.3671875 -0.0703125 128.0 | | B |
6117 // | 1 |
6118
6119 // approximation:
6120 // Y = ( 33 * R + 64 * G + 13 * B) / 128 + 16
6121 // U = (-19 * R - 37 * G + 56 * B) / 128 + 128
6122 // V = ( 56 * R - 47 * G - 9 * B) / 128 + 128
6123
6124 // we load 8 pixels (= 3 * 8 values) and directly deinterleave the 3 channels so that we receive the following patterns:
6125 // source_u_8x8x3.val[0]: R R R R R R R R
6126 // source_u_8x8x3.val[1]: G G G G G G G G
6127 // source_u_8x8x3.val[2]: B B B B B B B B
6128
6129 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6130
6131 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[0])));
6132 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[1])));
6133 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(source_u_8x16x3.val[2])));
6134
6135 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[0])));
6136 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[1])));
6137 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(source_u_8x16x3.val[2])));
6138
6139
6140 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_128_s_16x8);
6141 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_128_s_16x8);
6142 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_128_s_16x8);
6143
6144 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_128_s_16x8);
6145 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_128_s_16x8);
6146 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_128_s_16x8);
6147
6148
6149 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source1_low_s_16x8, factorChannel01_128_s_16x8);
6150 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source1_low_s_16x8, factorChannel11_128_s_16x8);
6151 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source1_low_s_16x8, factorChannel21_128_s_16x8);
6152
6153 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source1_high_s_16x8, factorChannel01_128_s_16x8);
6154 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source1_high_s_16x8, factorChannel11_128_s_16x8);
6155 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source1_high_s_16x8, factorChannel21_128_s_16x8);
6156
6157
6158 intermediateResults0_low_s_16x8 = vmlaq_s16(intermediateResults0_low_s_16x8, source2_low_s_16x8, factorChannel02_128_s_16x8);
6159 intermediateResults1_low_s_16x8 = vmlaq_s16(intermediateResults1_low_s_16x8, source2_low_s_16x8, factorChannel12_128_s_16x8);
6160 intermediateResults2_low_s_16x8 = vmlaq_s16(intermediateResults2_low_s_16x8, source2_low_s_16x8, factorChannel22_128_s_16x8);
6161
6162 intermediateResults0_high_s_16x8 = vmlaq_s16(intermediateResults0_high_s_16x8, source2_high_s_16x8, factorChannel02_128_s_16x8);
6163 intermediateResults1_high_s_16x8 = vmlaq_s16(intermediateResults1_high_s_16x8, source2_high_s_16x8, factorChannel12_128_s_16x8);
6164 intermediateResults2_high_s_16x8 = vmlaq_s16(intermediateResults2_high_s_16x8, source2_high_s_16x8, factorChannel22_128_s_16x8);
6165
6166 // now we add the bias values (saturated)
6167
6168 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, biasChannel0_128_s_16x8);
6169 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, biasChannel0_128_s_16x8);
6170
6171 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, biasChannel1_128_s_16x8);
6172 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, biasChannel1_128_s_16x8);
6173
6174 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, biasChannel2_128_s_16x8);
6175 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, biasChannel2_128_s_16x8);
6176
6177
6178 uint8x16x3_t results_u_8x16x3;
6179
6180 // saturated narrow signed to unsigned shift with rounding
6181 results_u_8x16x3.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 7));
6182 results_u_8x16x3.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 7));
6183 results_u_8x16x3.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 7), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 7));
6184
6185 // and we can store the result
6186 vst3q_u8(target, results_u_8x16x3);
6187}
6188
6189OCEAN_FORCE_INLINE void FrameChannels::convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const int16x8_t& factorChannel00_64_s_16x8, const int16x8_t& factorChannel10_64_s_16x8, const int16x8_t& factorChannel20_64_s_16x8, const int16x8_t& factorChannel01_64_s_16x8, const int16x8_t& factorChannel11_64_s_16x8, const int16x8_t& factorChannel21_64_s_16x8, const int16x8_t& factorChannel02_64_s_16x8, const int16x8_t& factorChannel12_64_s_16x8, const int16x8_t& factorChannel22_64_s_16x8, const uint8x8_t& biasChannel0_u_8x8, const uint8x8_t& biasChannel1_u_8x8, const uint8x8_t& biasChannel2_u_8x8, const uint8x16_t& channelValue3_u_8x16)
6190{
6191 ocean_assert(source != nullptr && target != nullptr);
6192
6193 // the documentation of this function designed for YUV24 to RGB24 conversion
6194
6195 // precise color space conversion:
6196 // | R | | 1 0.0 1.370705 -175.45024 | | Y |
6197 // | G | = | 1 -0.3376335 -0.698001 132.561152 | * | U |
6198 // | B | | 1 1.732446 0.0 -221.753088 | | V |
6199 // | 1 |
6200
6201 // approximation:
6202 // R = 64 * Y + 0 * (U - 128) + 88 * (V - 128)
6203 // G = 64 * Y - 22 * (U - 128) - 45 * (V - 128)
6204 // B = 64 * Y + 111 * (U - 128) + 0 * (V - 128)
6205
6206 const uint8x16x3_t source_u_8x16x3 = vld3q_u8(source);
6207
6208 // Y' = Y - bias0, U' = U - bias1, V' = V - bias2
6209 const int16x8_t source0_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6210 const int16x8_t source1_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6211 const int16x8_t source2_low_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_low_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6212
6213 const int16x8_t source0_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[0]), biasChannel0_u_8x8));
6214 const int16x8_t source1_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[1]), biasChannel1_u_8x8));
6215 const int16x8_t source2_high_s_16x8 = vreinterpretq_s16_u16(vsubl_u8(vget_high_u8(source_u_8x16x3.val[2]), biasChannel2_u_8x8));
6216
6217 // now we mulitply apply the 3x3 matrix multiplication
6218
6219 int16x8_t intermediateResults0_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel00_64_s_16x8);
6220 int16x8_t intermediateResults1_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel10_64_s_16x8);
6221 int16x8_t intermediateResults2_low_s_16x8 = vmulq_s16(source0_low_s_16x8, factorChannel20_64_s_16x8);
6222
6223 int16x8_t intermediateResults0_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel00_64_s_16x8);
6224 int16x8_t intermediateResults1_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel10_64_s_16x8);
6225 int16x8_t intermediateResults2_high_s_16x8 = vmulq_s16(source0_high_s_16x8, factorChannel20_64_s_16x8);
6226
6227 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel01_64_s_16x8)); // intermediateResults0 = saturated(intermediateResults0 + source10_low * factorChannel01)
6228 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel11_64_s_16x8));
6229 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source1_low_s_16x8, factorChannel21_64_s_16x8));
6230
6231 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel01_64_s_16x8));
6232 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel11_64_s_16x8));
6233 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source1_high_s_16x8, factorChannel21_64_s_16x8));
6234
6235 intermediateResults0_low_s_16x8 = vqaddq_s16(intermediateResults0_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel02_64_s_16x8));
6236 intermediateResults1_low_s_16x8 = vqaddq_s16(intermediateResults1_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel12_64_s_16x8));
6237 intermediateResults2_low_s_16x8 = vqaddq_s16(intermediateResults2_low_s_16x8, vmulq_s16(source2_low_s_16x8, factorChannel22_64_s_16x8));
6238
6239 intermediateResults0_high_s_16x8 = vqaddq_s16(intermediateResults0_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel02_64_s_16x8));
6240 intermediateResults1_high_s_16x8 = vqaddq_s16(intermediateResults1_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel12_64_s_16x8));
6241 intermediateResults2_high_s_16x8 = vqaddq_s16(intermediateResults2_high_s_16x8, vmulq_s16(source2_high_s_16x8, factorChannel22_64_s_16x8));
6242
6243 uint8x16x4_t results_u_8x16x4;
6244
6245 // saturated narrow signed to unsigned, normalized by 2^6
6246 results_u_8x16x4.val[0] = vcombine_u8(vqrshrun_n_s16(intermediateResults0_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults0_high_s_16x8, 6));
6247 results_u_8x16x4.val[1] = vcombine_u8(vqrshrun_n_s16(intermediateResults1_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults1_high_s_16x8, 6));
6248 results_u_8x16x4.val[2] = vcombine_u8(vqrshrun_n_s16(intermediateResults2_low_s_16x8, 6), vqrshrun_n_s16(intermediateResults2_high_s_16x8, 6));
6249 results_u_8x16x4.val[3] = channelValue3_u_8x16;
6250
6251 // and we can store the result
6252 vst4q_u8(target, results_u_8x16x4);
6253}
6254
6255template <bool tUseFactorChannel0, bool tUseFactorChannel1, bool tUseFactorChannel2, bool tUseFactorChannel3>
6256void FrameChannels::convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel0_128_u_8x8, const uint8x8_t& factorChannel1_128_u_8x8, const uint8x8_t& factorChannel2_128_u_8x8, const uint8x8_t& factorChannel3_128_u_8x8)
6257{
6258 static_assert(tUseFactorChannel0 || tUseFactorChannel1 || tUseFactorChannel2 || tUseFactorChannel3, "Invalid multiplication factors!");
6259
6260 ocean_assert(source != nullptr && target != nullptr);
6261
6262 // the documentation of this function designed for RGBA32 to Y8 conversion
6263
6264 // precise color space conversion:
6265 // Y = 0.299 * R + 0.587 * G + 0.114 * B
6266
6267 // approximation:
6268 // Y = (38 * R + 75 * G + 15 * B) / 128
6269
6270 // we expect the following input pattern (for here RGBA32):
6271 // FEDC BA98 7654 3210
6272 // ABGR ABGR ABGR ABGR
6273
6274 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6275 // m4_64_pixels.val[0]: R R R R R R R R
6276 // m4_64_pixels.val[1]: G G G G G G G G
6277 // m4_64_pixels.val[2]: B B B B B B B B
6278 // m4_64_pixels.val[3]: A A A A A A A A
6279
6280 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6281
6282 uint16x8_t intermediateResults_16x8;
6283
6284 // we multiply the first channel with the specified factor (unless zero)
6285
6286 if constexpr (tUseFactorChannel0)
6287 {
6288 intermediateResults_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel0_128_u_8x8);
6289 }
6290 else
6291 {
6292 intermediateResults_16x8 = vdupq_n_u16(0u);
6293 }
6294
6295 // we multiply the second channel with the specified factor (unless zero) and accumulate the results
6296
6297 if constexpr (tUseFactorChannel1)
6298 {
6299 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[1], factorChannel1_128_u_8x8);
6300 }
6301
6302 // we multiply the third channel with the specified factor (unless zero) and accumulate the results
6303
6304 if constexpr (tUseFactorChannel2)
6305 {
6306 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[2], factorChannel2_128_u_8x8);
6307 }
6308
6309 // we multiply the fourth channel with the specified factor (unless zero) and accumulate the results
6310
6311 if constexpr (tUseFactorChannel3)
6312 {
6313 intermediateResults_16x8 = vmlal_u8(intermediateResults_16x8, pixels_u_8x8x4.val[3], factorChannel3_128_u_8x8);
6314 }
6315
6316 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6317 uint8x8_t results_u_8x8 = vqrshrn_n_u16(intermediateResults_16x8, 7); // pixels_u_8x8x4 = (intermediateResults_16x8 + 2^6) >> 2^7
6318
6319 // and we can store the result
6320 vst1_u8(target, results_u_8x8);
6321}
6322
6323OCEAN_FORCE_INLINE void FrameChannels::convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t* const source, uint8_t* const target, const uint8x8_t& factorChannel00_128_u_8x8, const uint8x8_t& factorChannel10_128_u_8x8, const uint8x8_t& factorChannel01_128_u_8x8, const uint8x8_t& factorChannel11_128_u_8x8, const uint8x8_t& factorChannel02_128_u_8x8, const uint8x8_t& factorChannel12_128_u_8x8, const uint8x8_t& factorChannel03_128_u_8x8, const uint8x8_t& factorChannel13_128_u_8x8)
6324{
6325 ocean_assert(source != nullptr && target != nullptr);
6326
6327 // the documentation of this function designed for RGBA32 to YA16 conversion
6328
6329 // precise color space conversion:
6330 // Y = 0.299 * R + 0.587 * G + 0.114 * B + 0.0 * A
6331 // A = 0.0 * R + 0.0 * G + 0.0 * B + 1.0 * A
6332
6333 // approximation:
6334 // Y = (38 * R + 75 * G + 15 * B + 0 * A) / 128
6335 // A = (128 * A) / 128
6336
6337 // we expect the following input pattern (for here RGBA32):
6338 // FEDC BA98 7654 3210
6339 // ABGR ABGR ABGR ABGR
6340
6341 // we load 8 pixels (= 4 * 8 values) and directly deinterleave the 4 channels so that we receive the following patterns:
6342 // m4_64_pixels.val[0]: R R R R R R R R
6343 // m4_64_pixels.val[1]: G G G G G G G G
6344 // m4_64_pixels.val[2]: B B B B B B B B
6345 // m4_64_pixels.val[3]: A A A A A A A A
6346
6347 uint8x8x4_t pixels_u_8x8x4 = vld4_u8(source);
6348
6349 uint16x8_t intermediateResultsChannel0_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel00_128_u_8x8);
6350 uint16x8_t intermediateResultsChannel1_16x8 = vmull_u8(pixels_u_8x8x4.val[0], factorChannel10_128_u_8x8);
6351
6352 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[1], factorChannel01_128_u_8x8);
6353 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[1], factorChannel11_128_u_8x8);
6354
6355 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[2], factorChannel02_128_u_8x8);
6356 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[2], factorChannel12_128_u_8x8);
6357
6358 intermediateResultsChannel0_16x8 = vmlal_u8(intermediateResultsChannel0_16x8, pixels_u_8x8x4.val[3], factorChannel03_128_u_8x8);
6359 intermediateResultsChannel1_16x8 = vmlal_u8(intermediateResultsChannel1_16x8, pixels_u_8x8x4.val[3], factorChannel13_128_u_8x8);
6360
6361 uint8x8x2_t results_u_8x8x2;
6362
6363 // we shift the 16 bit values by 7 bits (= 128), apply rounding, and narrow the 16 bit integers to 8 bit integers within one operation
6364
6365 results_u_8x8x2.val[0] = vqrshrn_n_u16(intermediateResultsChannel0_16x8, 7); // results_u_8x8x2.val[0] = (intermediateResultsChannel0_16x8 + 2^6) >> 2^7
6366 results_u_8x8x2.val[1] = vqrshrn_n_u16(intermediateResultsChannel1_16x8, 7);
6367
6368 // and we can store the result
6369 vst2_u8(target, results_u_8x8x2);
6370}
6371
6372#endif // OCEAN_HARDWARE_NEON_VERSION
6373
6374}
6375
6376}
6377
6378#endif // META_OCEAN_CV_FRAME_CHANNELS_H
The following comfort class provides comfortable functions simplifying prototyping applications but a...
Definition FrameChannels.h:51
static bool premultipliedAlphaToStraightAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool zipChannels(const Frames &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool separateTo1Channel(const Frame &sourceFrame, Frames &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool premultipliedAlphaToStraightAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
static bool separateTo1Channel(const Frame &sourceFrame, const std::initializer_list< Frame * > &targetFrames, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
static bool zipChannels(const std::initializer_list< Frame > &sourceFrames, Frame &targetFrame, const FrameType::PixelFormat targetPixelFormat=FrameType::FORMAT_UNDEFINED)
Zips/interleaves 1-channel images into one image with n-channels.
static bool straightAlphaToPremultipliedAlpha(Frame &frame, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
static bool straightAlphaToPremultipliedAlpha(const Frame &source, Frame &target, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
This class implements frame channel conversion, transformation and extraction functions.
Definition FrameChannels.h:31
static void reverseChannelOrder(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Reverses the order of the channels of a frame with zipped pixel format.
Definition FrameChannels.h:2840
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_1024_s_16x8, const __m128i &factorChannel10_1024_s_16x8, const __m128i &factorChannel20_1024_s_16x8, const __m128i &factorChannel01_1024_s_16x8, const __m128i &factorChannel11_1024_s_16x8, const __m128i &factorChannel21_1024_s_16x8, const __m128i &factorChannel02_1024_s_16x8, const __m128i &factorChannel12_1024_s_16x8, const __m128i &factorChannel22_1024_s_16x8, const __m128i &biasChannel0_1024_s_32x4, const __m128i &biasChannel1_1024_s_32x4, const __m128i &biasChannel2_1024_s_32x4)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5341
static void addChannelValueRow(const T *source, T *target, const size_t size, const void *channelValueParameter)
Adds a channel to a given row with generic (zipped) pixel format and sets all values to a specified v...
Definition FrameChannels.h:4289
static void shuffleRowChannelsAndSetLastChannelValue(const T *source, T *target, const size_t size, const void *options=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern and sets the last c...
Definition FrameChannels.h:3747
static void separateTo1Channel(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:1847
static OCEAN_FORCE_INLINE void convert3ChannelsTo4Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8, const uint8x16_t &channelValue3_u_8x16)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 4 channels per pixel by a linear combi...
Definition FrameChannels.h:6189
static void addChannelRow(const void **sources, void **targets, const unsigned int multipleRowIndex, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const void *options)
Adds a channel to a given row with generic (zipped) pixel format and copies the information of the ne...
Definition FrameChannels.h:4189
static void shuffleChannelsAndSetLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of source frame and sets the last channel with constant value in the target fra...
Definition FrameChannels.h:3910
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0_128_u_16x8, const __m128i &multiplicationFactors1_128_u_16x8, const __m128i &multiplicationFactors2_128_u_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5187
static void shuffleChannels(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Shuffles the channels of a frame by an arbitrary pattern.
Definition FrameChannels.h:3882
static void convertRow3ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 3 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4877
static void convertRow4ChannelsTo1Channel8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *channelMultiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with one channel by a linear combination of the fo...
Definition FrameChannels.h:4950
static void copyChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Copies one channel from a given frame with zipped pixel format to another frame with zipped pixel for...
Definition FrameChannels.h:2799
static void zipChannels(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:2598
static void straightAlphaToPremultipliedAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:4092
static void applyRowOperator(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > &rowOperatorFunction, Worker *worker=nullptr)
Applies a row operator to all rows of a source image.
Definition FrameChannels.h:4007
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5768
static void convertRow4ChannelsTo2Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *multiplicationFactors_128)
Converts a row of pixels with 4 channels to pixels with two channel by a linear combination of the fo...
static void setChannelSubset(T *frame, const unsigned int width, const T value, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Sets one channel of a frame with one unique value.
Definition FrameChannels.h:4488
static void applyBivariateOperatorSubset(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Generic bivariate pixel operations.
Definition FrameChannels.h:4721
static void applyAdvancedPixelModifier(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3969
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel6BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_64_s_16x8, const int16x8_t &factorChannel10_64_s_16x8, const int16x8_t &factorChannel20_64_s_16x8, const int16x8_t &factorChannel01_64_s_16x8, const int16x8_t &factorChannel11_64_s_16x8, const int16x8_t &factorChannel21_64_s_16x8, const int16x8_t &factorChannel02_64_s_16x8, const int16x8_t &factorChannel12_64_s_16x8, const int16x8_t &factorChannel22_64_s_16x8, const uint8x8_t &biasChannel0_u_8x8, const uint8x8_t &biasChannel1_u_8x8, const uint8x8_t &biasChannel2_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5714
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear com...
Definition FrameChannels.h:5981
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8, const uint8x8_t &factorChannel3_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static void addFirstChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2711
static void addLastChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the ba...
Definition FrameChannels.h:2731
static void removeFirstChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the first channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2767
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel10BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x4_t &factorChannel00_1024_s_16x4, const int16x4_t &factorChannel10_1024_s_16x4, const int16x4_t &factorChannel20_1024_s_16x4, const int16x4_t &factorChannel01_1024_s_16x4, const int16x4_t &factorChannel11_1024_s_16x4, const int16x4_t &factorChannel21_1024_s_16x4, const int16x4_t &factorChannel02_1024_s_16x4, const int16x4_t &factorChannel12_1024_s_16x4, const int16x4_t &factorChannel22_1024_s_16x4, const int32x4_t &biasChannel0_1024_s_32x4, const int32x4_t &biasChannel1_1024_s_32x4, const int32x4_t &biasChannel2_1024_s_32x4)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5890
static void addLastChannelValue(const T *source, const T newChannelValue, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the value of the new channel will be th...
Definition FrameChannels.h:2747
static void convertRow3ChannelsTo3Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void reverseRowPixelOrderInPlace(T *data, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general) in place.
Definition FrameChannels.h:3017
static void applyRowOperatorSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const RowOperatorFunction< TSource, TTarget, tSourceChannels, tTargetChannels > rowOperatorFunction, const unsigned int firstRow, const unsigned int numberRows)
Applies a row operator to a subset of all rows of a source image.
Definition FrameChannels.h:4854
static void applyPixelModifier(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:3954
static void narrowRow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const size_t size, const void *unusedParameters=nullptr)
Narrows a row of pixels with 16 bit channels to pixels with 8 bit channels.
Definition FrameChannels.h:4130
static void applyAdvancedPixelModifierSubset(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4615
static void convertRow4ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void shuffleRowChannels(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Shuffles the channels of row pixels by application of a specified shuffle pattern.
Definition FrameChannels.h:3387
static void premultipliedAlphaToStraightAlpha8BitPerChannel(uint8_t *const frame, const unsigned int width, const unsigned int height, const unsigned int framePaddingElements, Worker *worker=nullptr)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:4054
static void convertRow3ChannelsTo3Channels8BitPerChannel7BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with 3 channels per pixel by a linear combi...
Definition FrameChannels.h:6107
static void convertRow3ChannelsTo4Channels8BitPerChannel6BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 4 channels by a linear combination of the thr...
static constexpr unsigned int CHANNELS_NOT_KNOWN_AT_COMPILE_TIME
Definition of a constant to specify that the number of channels are not known at compile time but at ...
Definition FrameChannels.h:37
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const int16x8_t &factorChannel00_128_s_16x8, const int16x8_t &factorChannel10_128_s_16x8, const int16x8_t &factorChannel20_128_s_16x8, const int16x8_t &factorChannel01_128_s_16x8, const int16x8_t &factorChannel11_128_s_16x8, const int16x8_t &factorChannel21_128_s_16x8, const int16x8_t &factorChannel02_128_s_16x8, const int16x8_t &factorChannel12_128_s_16x8, const int16x8_t &factorChannel22_128_s_16x8, const int16x8_t &biasChannel0_128_s_16x8, const int16x8_t &biasChannel1_128_s_16x8, const int16x8_t &biasChannel2_128_s_16x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with three channel per pixel by a linear comb...
Definition FrameChannels.h:5833
static void copyChannelRow(const T *source, T *target, const size_t size, const void *unusedParameters=nullptr)
Copies one channel from a source row to a target row with generic (zipped) pixel format.
Definition FrameChannels.h:4328
static void reverseRowPixelOrder(const T *source, T *target, const size_t size)
Reverses/mirrors the order of pixels in a given row (or a memory block in general).
Definition FrameChannels.h:2856
static OCEAN_FORCE_INLINE void convert3ChannelsTo1Channel8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel0_128_u_8x8, const uint8x8_t &factorChannel1_128_u_8x8, const uint8x8_t &factorChannel2_128_u_8x8)
Converts 8 pixels with 3 channels per pixel to 8 pixels with one channel per pixel by a linear combin...
static OCEAN_FORCE_INLINE void convert4ChannelsTo1Channel16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactors0123_128_s_32x)
Converts 16 pixels with 4 channels per pixel to 16 pixels with one channel per pixel by a linear comb...
Definition FrameChannels.h:5478
static void removeLastChannel(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Removes the last channel from a given frame with zipped (generic) pixel format.
Definition FrameChannels.h:2783
static void transformGeneric(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker)
Transforms a frame with generic pixel format (with zipped pixel information) like RGB24 or YUV24,...
Definition FrameChannels.h:4029
static void setChannel(T *frame, const unsigned int width, const unsigned int height, const T value, const unsigned int framePaddingElements, Worker *worker=nullptr)
Sets one channel of a frame with a specific unique value.
Definition FrameChannels.h:2821
static void straightAlphaToPremultipliedAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with straight alpha (without premultiplied alpha) to an image with premultiplied al...
Definition FrameChannels.h:5114
static void narrow16BitPerChannelTo8BitPerChannel(const uint16_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Narrows 16 bit channels of a frame to 8 bit channels.
Definition FrameChannels.h:3938
static void transformGenericSubset(const uint8_t *source, uint8_t *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const RowReversePixelOrderFunction< void > rowReversePixelOrderFunction, const unsigned int bytesPerRow, const unsigned int sourceStrideBytes, const unsigned int targetStrideBytes, const unsigned int firstRow, const unsigned int numberRows)
Transforms a subset of a frame with generic pixel format (with zipped pixel information) like RGB24 o...
static OCEAN_FORCE_INLINE void convert3ChannelsTo3Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &factorChannel00_128_s_16x8, const __m128i &factorChannel10_128_s_16x8, const __m128i &factorChannel20_128_s_16x8, const __m128i &factorChannel01_128_s_16x8, const __m128i &factorChannel11_128_s_16x8, const __m128i &factorChannel21_128_s_16x8, const __m128i &factorChannel02_128_s_16x8, const __m128i &factorChannel12_128_s_16x8, const __m128i &factorChannel22_128_s_16x8, const __m128i &biasChannel0_s_16x8, const __m128i &biasChannel1_s_16x8, const __m128i &biasChannel2_s_16x8)
Converts 16 pixels with 3 channels per pixel to 16 pixels with three channel per pixel by a linear co...
Definition FrameChannels.h:5253
static void reverseRowChannelOrder(const T *source, T *target, const size_t size, const void *unusedOptions=nullptr)
Reverses/mirrors the order of channels in a given row (or a memory block in general).
Definition FrameChannels.h:3195
static void convertRow3ChannelsTo3Channels8BitPerChannel10BitPrecision(const uint8_t *source, uint8_t *target, const size_t size, const void *parameters)
Converts a row of pixels with 3 channels to pixels with 3 channels by a linear combination of the thr...
static void applyBivariateOperator(const TSource0 *source0, const TSource1 *source1, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int source0PaddingElements, const unsigned int source1PaddingElements, const unsigned int targetPaddingElements, const ConversionFlag conversionFlag, Worker *worker=nullptr)
Generic bivariate pixel operations Applies bivariate per-pixel operators: C(y, x) = op(A(y,...
Definition FrameChannels.h:3988
static void addFirstChannel(const T *source, const T *sourceNewChannel, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int sourcePaddingElements, const unsigned int sourceNewChannelPaddingElements, const unsigned int targetPaddingElements, Worker *worker=nullptr)
Adds a new channel to a given frame with zipped pixel format, the new channel will be added to the fr...
Definition FrameChannels.h:2695
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels8Pixels8BitPerChannel7BitPrecisionNEON(const uint8_t *const source, uint8_t *const target, const uint8x8_t &factorChannel00_128_u_8x8, const uint8x8_t &factorChannel10_128_u_8x8, const uint8x8_t &factorChannel01_128_u_8x8, const uint8x8_t &factorChannel11_128_u_8x8, const uint8x8_t &factorChannel02_128_u_8x8, const uint8x8_t &factorChannel12_128_u_8x8, const uint8x8_t &factorChannel03_128_u_8x8, const uint8x8_t &factorChannel13_128_u_8x8)
Converts 8 pixels with 4 channels per pixel to 8 pixels with two channels per pixel by a linear combi...
Definition FrameChannels.h:6323
static void separateTo1ChannelRuntime(const TSource *const sourceFrame, TTarget *const *const targetFrames, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int sourceFramePaddingElements, const unsigned int *targetFramesPaddingElements)
Separates a given frame with zipped pixel format e.g., FORMAT_RGB24, FORMAT_YUV24,...
Definition FrameChannels.h:4349
static void zipChannelsRuntime(const TSource *const *const sourceFrames, TTarget *const targetFrame, const unsigned int width, const unsigned int height, const unsigned int channels, const unsigned int *sourceFramesPaddingElements, const unsigned int targetFramePaddingElements)
Zips/interleaves 1-channel images into one image with n-channels.
Definition FrameChannels.h:4422
void(*)(const TSource *sourceRow, TTarget *targetRow, const unsigned int width, const unsigned int height, unsigned int rowIndex, const unsigned int sourceStrideElements, const unsigned int targetStrideElements) RowOperatorFunction
Definition of a function pointer to a function able to operate on an entire image row.
Definition FrameChannels.h:43
static void applyPixelModifierSubset(const T *source, T *target, const unsigned int width, const unsigned int height, const ConversionFlag conversionFlag, const unsigned int firstRow, const unsigned int numberRows)
Applies a specific modifier function on each pixel.
Definition FrameChannels.h:4511
static void premultipliedAlphaToStraightAlpha8BitPerChannelSubset(uint8_t *const frame, const unsigned int width, const unsigned int framePaddingElements, const unsigned int firstRow, const unsigned int numberRows)
Converts an image with premultiplied alpha to a straight image (without premultiplied alpha).
Definition FrameChannels.h:5025
static OCEAN_FORCE_INLINE void convert4ChannelsTo2Channels16Pixels8BitPerChannel7BitPrecisionSSE(const uint8_t *const source, uint8_t *const target, const __m128i &multiplicationFactorsChannel0_0123_128_s_16x8, const __m128i &multiplicationFactorsChannel1_0123_128_s_16x8)
Converts 16 pixels with 4 channels per pixel to 16 pixels with two channel per pixel by a linear comb...
Definition FrameChannels.h:5541
This is the base class for all frame converter classes.
Definition FrameConverter.h:32
ConversionFlag
Definition of individual conversion flags.
Definition FrameConverter.h:39
@ CONVERT_NORMAL
Normal conversion, neither flips nor mirrors the image.
Definition FrameConverter.h:49
@ CONVERT_FLIPPED_AND_MIRRORED
Rotated conversion, rotates the image by 180.0 degrees with anchor in the center of the image.
Definition FrameConverter.h:82
@ CONVERT_MIRRORED
Mirrored conversion, exchanges left and right of the image (like in a mirror, mirroring around the y-...
Definition FrameConverter.h:71
@ CONVERT_FLIPPED
Flipped conversion, exchanges top and bottom of the image (flipping around the x-axis).
Definition FrameConverter.h:60
static void convertGenericPixelFormat(const TSource *source, TTarget *target, const unsigned int width, const unsigned int height, const unsigned int sourceStrideElements, const unsigned int targetStrideElements, const ConversionFlag flag, const RowConversionFunction< TSource, TTarget > rowConversionFunction, const RowReversePixelOrderInPlaceFunction< TTarget > targetReversePixelOrderInPlaceFunction, const bool areContinuous, const void *options, Worker *worker)
Converts a frame with generic pixel format (e.g., RGBA32, BGR24, YUV24, ...) to a frame with generic ...
Definition FrameConverter.h:3225
void(*)(T *row, const size_t width) RowReversePixelOrderInPlaceFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:603
void(*)(const T *inputRow, T *targetRow, const size_t width) RowReversePixelOrderFunction
Definition of a function pointer to a function able to reverse the order of pixels in an image row wi...
Definition FrameConverter.h:594
static void convertArbitraryPixelFormat(const void **sources, void **targets, const unsigned int width, const unsigned int height, const ConversionFlag flag, const unsigned int multipleRowsPerIteration, const MultipleRowsConversionFunction multipleRowsConversionFunction, const void *options, Worker *worker)
Converts a frame with arbitrary pixel format (e.g., Y_UV12, Y_VU12, YUYV16, ...) to a frame with arbi...
Definition FrameConverter.h:3248
static OCEAN_FORCE_INLINE uint8x16_t cast16ElementsNEON(const float32x4_t &sourceA_f_32x4, const float32x4_t &sourceB_f_32x4, const float32x4_t &sourceC_f_32x4, const float32x4_t &sourceD_f_32x4)
Casts 16 float elements to 16 uint8_t elements.
Definition NEON.h:1208
static __m128i divideByRightShiftSigned32Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 32 bit values by applying a right shift.
Definition SSE.h:3108
static __m128i load128i(const void *const buffer)
Loads a 128i value from the memory.
Definition SSE.h:3619
static void store128i(const __m128i &value, uint8_t *const buffer)
Stores a 128i value to the memory.
Definition SSE.h:3764
static __m128i divideByRightShiftSigned16Bit(const __m128i &value, const unsigned int rightShifts)
Divides eight signed 16 bit values by applying a right shift.
Definition SSE.h:3066
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8AndAccumulate(const __m128i &values0, const __m128i &values1, __m128i &results0, __m128i &results1)
Multiplies 8 int16_t values with 8 int16_t values and adds the products to 8 int32_t values.
Definition SSE.h:3909
static OCEAN_FORCE_INLINE void interleave3Channel8Bit48Elements(const __m128i &channel0, const __m128i &channel1, const __m128i &channel2, __m128i &interleavedA, __m128i &interleavedB, __m128i &interleavedC)
Interleaves 48 elements of e.g., an image with 3 channels and 8 bit per element.
Definition SSE.h:3345
static OCEAN_FORCE_INLINE void reverseChannelOrder3Channel8Bit48Elements(const __m128i &interleaved0, const __m128i &interleaved1, const __m128i &interleaved2, __m128i &reversedInterleaved0, __m128i &reversedInterleaved1, __m128i &reversedInterleaved2)
Reverses the order of the first and last channel of 48 elements of an image with 3 interleaved channe...
Definition SSE.h:3387
static __m128i removeHighBits16_8(const __m128i &value)
Removes the higher 8 bits of eight 16 bit elements.
Definition SSE.h:3799
static OCEAN_FORCE_INLINE void deInterleave3Channel8Bit48Elements(const __m128i &interleavedA, const __m128i &interleavedB, const __m128i &interleavedC, __m128i &channel0, __m128i &channel1, __m128i &channel2)
Deinterleaves 48 elements of e.g., and image with 3 channels and 8 bit per element.
Definition SSE.h:3304
static __m128i set128i(const unsigned long long high64, const unsigned long long low64)
Sets a 128i value by two 64 bit values.
Definition SSE.h:3770
static OCEAN_FORCE_INLINE void reverseChannelOrder4Channel8Bit64Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (64 elements) of an image with 4 interleaved channels...
Definition SSE.h:3412
static OCEAN_FORCE_INLINE void multiplyInt8x16ToInt32x8(const __m128i &values0, const __m128i &values1, __m128i &products0, __m128i &products1)
Multiplies 8 int16_t values with 8 int16_t values and returns the products as 8 int32_t results.
Definition SSE.h:3900
static OCEAN_FORCE_INLINE void reverseChannelOrder2Channel8Bit32Elements(const uint8_t *interleaved, uint8_t *reversedInterleaved)
Reverses the order of the channels of 16 pixels (32 elements) of an image with 2 interleaved channels...
Definition SSE.h:3372
static Caller< void > createStatic(typename StaticFunctionPointerMaker< void, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass, NullClass >::Type function)
Creates a new caller container for a static function with no function parameter.
Definition Caller.h:2877
This class implements Ocean's image class.
Definition Frame.h:1808
PixelFormat
Definition of all pixel formats available in the Ocean framework.
Definition Frame.h:183
TypeMapperBySize< sizeof(T)>::Type Type
Definition of an invalid mapped data type.
Definition DataType.h:508
This class implements a worker able to distribute function calls over different threads.
Definition Worker.h:33
bool executeFunction(const Function &function, const unsigned int first, const unsigned int size, const unsigned int firstIndex=(unsigned int)(-1), const unsigned int sizeIndex=(unsigned int)(-1), const unsigned int minimalIterations=1u, const unsigned int threadIndex=(unsigned int)(-1))
Executes a callback function separable by two function parameters.
std::vector< Frame > Frames
Definition of a vector holding padding frames.
Definition Frame.h:1771
std::vector< Index32 > Indices32
Definition of a vector holding 32 bit index values.
Definition Base.h:96
The namespace covering the entire Ocean framework.
Definition Accessor.h:15
Default definition of a type with tBytes bytes.
Definition DataType.h:32